summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKamil Trzciński <ayufan@ayufan.eu>2019-02-25 15:41:52 +0100
committerKamil Trzciński <ayufan@ayufan.eu>2019-03-15 14:38:28 +0100
commitb22287f00fc10800486510c64139b4fefb38ac4c (patch)
treeabcc545f4dafe74b9338a351dc3e095b1c82bef8
parent80fea82f3ab6afd486884020710eb01c06b048d9 (diff)
downloadgitlab-ce-b22287f00fc10800486510c64139b4fefb38ac4c.tar.gz
Make CI refs matching to to use UntrustedRegexp
This makes ref validation to use always `UntrustedRegexp`. This also splits the existing RubySyntax into separate class.
-rw-r--r--changelogs/unreleased/use-untrusted-regexp.yml5
-rw-r--r--doc/ci/yaml/README.md13
-rw-r--r--lib/gitlab/ci/build/policy/refs.rb4
-rw-r--r--lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb4
-rw-r--r--lib/gitlab/config/entry/legacy_validation_helpers.rb8
-rw-r--r--lib/gitlab/config/entry/validators.rb14
-rw-r--r--lib/gitlab/untrusted_regexp.rb35
-rw-r--r--lib/gitlab/untrusted_regexp/ruby_syntax.rb43
-rw-r--r--spec/lib/gitlab/ci/build/policy/refs_spec.rb13
-rw-r--r--spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb2
-rw-r--r--spec/lib/gitlab/ci/trace/stream_spec.rb2
-rw-r--r--spec/lib/gitlab/route_map_spec.rb2
-rw-r--r--spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb72
-rw-r--r--spec/lib/gitlab/untrusted_regexp_spec.rb74
-rw-r--r--spec/support/shared_examples/malicious_regexp_shared_examples.rb3
15 files changed, 198 insertions, 96 deletions
diff --git a/changelogs/unreleased/use-untrusted-regexp.yml b/changelogs/unreleased/use-untrusted-regexp.yml
new file mode 100644
index 00000000000..dd7f1bcaca1
--- /dev/null
+++ b/changelogs/unreleased/use-untrusted-regexp.yml
@@ -0,0 +1,5 @@
+---
+title: Use UntrustedRegexp for matching refs policy
+merge_request:
+author:
+type: security
diff --git a/doc/ci/yaml/README.md b/doc/ci/yaml/README.md
index aab26092811..251e2cd3d05 100644
--- a/doc/ci/yaml/README.md
+++ b/doc/ci/yaml/README.md
@@ -255,6 +255,19 @@ job:
- branches
```
+Pattern matching is case-sensitive by default. Use `i` flag modifier, like
+`/pattern/i` to make a pattern case-insensitive:
+
+```yaml
+job:
+ # use regexp
+ only:
+ - /^issue-.*$/i
+ # use special keyword
+ except:
+ - branches
+```
+
In this example, `job` will run only for refs that are tagged, or if a build is
explicitly requested via an API trigger or a [Pipeline Schedule][schedules]:
diff --git a/lib/gitlab/ci/build/policy/refs.rb b/lib/gitlab/ci/build/policy/refs.rb
index df5f5ffc253..360424bec11 100644
--- a/lib/gitlab/ci/build/policy/refs.rb
+++ b/lib/gitlab/ci/build/policy/refs.rb
@@ -35,8 +35,8 @@ module Gitlab
# patterns can be matched only when branch or tag is used
# the pattern matching does not work for merge requests pipelines
if pipeline.branch? || pipeline.tag?
- if pattern.first == "/" && pattern.last == "/"
- Regexp.new(pattern[1...-1]) =~ pipeline.ref
+ if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern)
+ regexp.match?(pipeline.ref)
else
pattern == pipeline.ref
end
diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
index d7e6dacf068..2b719c9c6fc 100644
--- a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
+++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
@@ -13,13 +13,13 @@ module Gitlab
def initialize(regexp)
@value = regexp
- unless Gitlab::UntrustedRegexp.valid?(@value)
+ unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value)
raise Lexer::SyntaxError, 'Invalid regular expression!'
end
end
def evaluate(variables = {})
- Gitlab::UntrustedRegexp.fabricate(@value)
+ Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value)
rescue RegexpError
raise Expression::RuntimeError, 'Invalid regular expression!'
end
diff --git a/lib/gitlab/config/entry/legacy_validation_helpers.rb b/lib/gitlab/config/entry/legacy_validation_helpers.rb
index d3ab5625743..0a629075302 100644
--- a/lib/gitlab/config/entry/legacy_validation_helpers.rb
+++ b/lib/gitlab/config/entry/legacy_validation_helpers.rb
@@ -45,17 +45,15 @@ module Gitlab
end
def validate_regexp(value)
- !value.nil? && Regexp.new(value.to_s) && true
- rescue RegexpError, TypeError
- false
+ Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
def validate_string_or_regexp(value)
return true if value.is_a?(Symbol)
return false unless value.is_a?(String)
- if value.first == '/' && value.last == '/'
- validate_regexp(value[1...-1])
+ if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
+ validate_regexp(value)
else
true
end
diff --git a/lib/gitlab/config/entry/validators.rb b/lib/gitlab/config/entry/validators.rb
index 25bfa50f829..d348e11b753 100644
--- a/lib/gitlab/config/entry/validators.rb
+++ b/lib/gitlab/config/entry/validators.rb
@@ -120,17 +120,13 @@ module Gitlab
private
- def look_like_regexp?(value)
- value.is_a?(String) && value.start_with?('/') &&
- value.end_with?('/')
+ def matches_syntax?(value)
+ Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
end
def validate_regexp(value)
- look_like_regexp?(value) &&
- Regexp.new(value.to_s[1...-1]) &&
- true
- rescue RegexpError
- false
+ matches_syntax?(value) &&
+ Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
end
@@ -149,7 +145,7 @@ module Gitlab
def validate_string_or_regexp(value)
return false unless value.is_a?(String)
- return validate_regexp(value) if look_like_regexp?(value)
+ return validate_regexp(value) if matches_syntax?(value)
true
end
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb
index ba1137313d8..14126b6ec06 100644
--- a/lib/gitlab/untrusted_regexp.rb
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -35,6 +35,10 @@ module Gitlab
matches
end
+ def match?(text)
+ text.present? && scan(text).present?
+ end
+
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
@@ -43,37 +47,6 @@ module Gitlab
self.source == other.source
end
- # Handles regular expressions with the preferred RE2 library where possible
- # via UntustedRegex. Falls back to Ruby's built-in regular expression library
- # when the syntax would be invalid in RE2.
- #
- # One difference between these is `(?m)` multi-line mode. Ruby regex enables
- # this by default, but also handles `^` and `$` differently.
- # See: https://www.regular-expressions.info/modifiers.html
- def self.with_fallback(pattern, multiline: false)
- UntrustedRegexp.new(pattern, multiline: multiline)
- rescue RegexpError
- Regexp.new(pattern)
- end
-
- def self.valid?(pattern)
- !!self.fabricate(pattern)
- rescue RegexpError
- false
- end
-
- def self.fabricate(pattern)
- matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
-
- raise RegexpError, 'Invalid regular expression!' if matches.nil?
-
- expression = matches[:regexp]
- flags = matches[:flags]
- expression.prepend("(?#{flags})") if flags.present?
-
- self.new(expression, multiline: false)
- end
-
private
attr_reader :regexp
diff --git a/lib/gitlab/untrusted_regexp/ruby_syntax.rb b/lib/gitlab/untrusted_regexp/ruby_syntax.rb
new file mode 100644
index 00000000000..91f300f97d0
--- /dev/null
+++ b/lib/gitlab/untrusted_regexp/ruby_syntax.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+module Gitlab
+ class UntrustedRegexp
+ # This class implements support for Ruby syntax of regexps
+ # and converts that to RE2 representation:
+ # /<regexp>/<flags>
+ class RubySyntax
+ PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze
+
+ # Checks if pattern matches a regexp pattern
+ # but does not enforce it's validity
+ def self.matches_syntax?(pattern)
+ pattern.is_a?(String) && pattern.match(PATTERN).present?
+ end
+
+ # The regexp can match the pattern `/.../`, but may not be fabricatable:
+ # it can be invalid or incomplete: `/match ( string/`
+ def self.valid?(pattern)
+ !!self.fabricate(pattern)
+ end
+
+ def self.fabricate(pattern)
+ self.fabricate!(pattern)
+ rescue RegexpError
+ nil
+ end
+
+ def self.fabricate!(pattern)
+ raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String)
+
+ matches = pattern.match(PATTERN)
+ raise RegexpError, 'Invalid regular expression!' if matches.nil?
+
+ expression = matches[:regexp]
+ flags = matches[:flags]
+ expression.prepend("(?#{flags})") if flags.present?
+
+ UntrustedRegexp.new(expression, multiline: false)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/ci/build/policy/refs_spec.rb b/spec/lib/gitlab/ci/build/policy/refs_spec.rb
index b4ddbf89b70..ec0450643c3 100644
--- a/spec/lib/gitlab/ci/build/policy/refs_spec.rb
+++ b/spec/lib/gitlab/ci/build/policy/refs_spec.rb
@@ -92,10 +92,23 @@ describe Gitlab::Ci::Build::Policy::Refs do
.to be_satisfied_by(pipeline)
end
+ it 'is satisfied when case-insensitive regexp matches pipeline ref' do
+ expect(described_class.new(['/DOCS-.*/i']))
+ .to be_satisfied_by(pipeline)
+ end
+
it 'is not satisfied when regexp does not match pipeline ref' do
expect(described_class.new(['/fix-.*/']))
.not_to be_satisfied_by(pipeline)
end
end
+
+ context 'malicious regexp' do
+ let(:pipeline) { build_stubbed(:ci_pipeline, ref: malicious_text) }
+
+ subject { described_class.new([malicious_regexp_ruby]) }
+
+ include_examples 'malicious regexp'
+ end
end
end
diff --git a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb
index 3ebc2e94727..cff7f57ceff 100644
--- a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb
+++ b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb
@@ -85,7 +85,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do
end
it 'raises error if evaluated regexp is not valid' do
- allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true)
+ allow(Gitlab::UntrustedRegexp::RubySyntax).to receive(:valid?).and_return(true)
regexp = described_class.new('/invalid ( .*/')
diff --git a/spec/lib/gitlab/ci/trace/stream_spec.rb b/spec/lib/gitlab/ci/trace/stream_spec.rb
index 38626f728d7..e45ea1c2528 100644
--- a/spec/lib/gitlab/ci/trace/stream_spec.rb
+++ b/spec/lib/gitlab/ci/trace/stream_spec.rb
@@ -414,7 +414,7 @@ describe Gitlab::Ci::Trace::Stream, :clean_gitlab_redis_cache do
context 'malicious regexp' do
let(:data) { malicious_text }
- let(:regex) { malicious_regexp }
+ let(:regex) { malicious_regexp_re2 }
include_examples 'malicious regexp'
end
diff --git a/spec/lib/gitlab/route_map_spec.rb b/spec/lib/gitlab/route_map_spec.rb
index d672f7b5675..a39c774429e 100644
--- a/spec/lib/gitlab/route_map_spec.rb
+++ b/spec/lib/gitlab/route_map_spec.rb
@@ -60,7 +60,7 @@ describe Gitlab::RouteMap do
subject do
map = described_class.new(<<-"MAP".strip_heredoc)
- - source: '#{malicious_regexp}'
+ - source: '#{malicious_regexp_re2}'
public: '/'
MAP
diff --git a/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb b/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb
new file mode 100644
index 00000000000..005d41580de
--- /dev/null
+++ b/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb
@@ -0,0 +1,72 @@
+require 'fast_spec_helper'
+require 'support/shared_examples/malicious_regexp_shared_examples'
+
+describe Gitlab::UntrustedRegexp::RubySyntax do
+ describe '.matches_syntax?' do
+ it 'returns true if regexp is valid' do
+ expect(described_class.matches_syntax?('/some .* thing/'))
+ .to be true
+ end
+
+ it 'returns true if regexp is invalid, but resembles regexp' do
+ expect(described_class.matches_syntax?('/some ( thing/'))
+ .to be true
+ end
+ end
+
+ describe '.valid?' do
+ it 'returns true if regexp is valid' do
+ expect(described_class.valid?('/some .* thing/'))
+ .to be true
+ end
+
+ it 'returns false if regexp is invalid' do
+ expect(described_class.valid?('/some ( thing/'))
+ .to be false
+ end
+ end
+
+ describe '.fabricate' do
+ context 'when regexp is valid' do
+ it 'fabricates regexp without flags' do
+ expect(described_class.fabricate('/some .* thing/')).not_to be_nil
+ end
+ end
+
+ context 'when regexp is a raw pattern' do
+ it 'returns error' do
+ expect(described_class.fabricate('some .* thing')).to be_nil
+ end
+ end
+ end
+
+ describe '.fabricate!' do
+ context 'when regexp is using /regexp/ scheme with flags' do
+ it 'fabricates regexp with a single flag' do
+ regexp = described_class.fabricate!('/something/i')
+
+ expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?i)something')
+ expect(regexp.scan('SOMETHING')).to be_one
+ end
+
+ it 'fabricates regexp with multiple flags' do
+ regexp = described_class.fabricate!('/something/im')
+
+ expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?im)something')
+ end
+
+ it 'fabricates regexp without flags' do
+ regexp = described_class.fabricate!('/something/')
+
+ expect(regexp).to eq Gitlab::UntrustedRegexp.new('something')
+ end
+ end
+
+ context 'when regexp is a raw pattern' do
+ it 'raises an error' do
+ expect { described_class.fabricate!('some .* thing') }
+ .to raise_error(RegexpError)
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/untrusted_regexp_spec.rb b/spec/lib/gitlab/untrusted_regexp_spec.rb
index 0a6ac0aa294..9d483f13a5e 100644
--- a/spec/lib/gitlab/untrusted_regexp_spec.rb
+++ b/spec/lib/gitlab/untrusted_regexp_spec.rb
@@ -2,48 +2,6 @@ require 'fast_spec_helper'
require 'support/shared_examples/malicious_regexp_shared_examples'
describe Gitlab::UntrustedRegexp do
- describe '.valid?' do
- it 'returns true if regexp is valid' do
- expect(described_class.valid?('/some ( thing/'))
- .to be false
- end
-
- it 'returns true if regexp is invalid' do
- expect(described_class.valid?('/some .* thing/'))
- .to be true
- end
- end
-
- describe '.fabricate' do
- context 'when regexp is using /regexp/ scheme with flags' do
- it 'fabricates regexp with a single flag' do
- regexp = described_class.fabricate('/something/i')
-
- expect(regexp).to eq described_class.new('(?i)something')
- expect(regexp.scan('SOMETHING')).to be_one
- end
-
- it 'fabricates regexp with multiple flags' do
- regexp = described_class.fabricate('/something/im')
-
- expect(regexp).to eq described_class.new('(?im)something')
- end
-
- it 'fabricates regexp without flags' do
- regexp = described_class.fabricate('/something/')
-
- expect(regexp).to eq described_class.new('something')
- end
- end
-
- context 'when regexp is a raw pattern' do
- it 'raises an error' do
- expect { described_class.fabricate('some .* thing') }
- .to raise_error(RegexpError)
- end
- end
- end
-
describe '#initialize' do
subject { described_class.new(pattern) }
@@ -92,11 +50,41 @@ describe Gitlab::UntrustedRegexp do
end
end
+ describe '#match?' do
+ subject { described_class.new(regexp).match?(text) }
+
+ context 'malicious regexp' do
+ let(:text) { malicious_text }
+ let(:regexp) { malicious_regexp_re2 }
+
+ include_examples 'malicious regexp'
+ end
+
+ context 'matching regexp' do
+ let(:regexp) { 'foo' }
+ let(:text) { 'foo' }
+
+ it 'returns an array of nil matches' do
+ is_expected.to eq(true)
+ end
+ end
+
+ context 'non-matching regexp' do
+ let(:regexp) { 'boo' }
+ let(:text) { 'foo' }
+
+ it 'returns an array of nil matches' do
+ is_expected.to eq(false)
+ end
+ end
+ end
+
describe '#scan' do
subject { described_class.new(regexp).scan(text) }
+
context 'malicious regexp' do
let(:text) { malicious_text }
- let(:regexp) { malicious_regexp }
+ let(:regexp) { malicious_regexp_re2 }
include_examples 'malicious regexp'
end
diff --git a/spec/support/shared_examples/malicious_regexp_shared_examples.rb b/spec/support/shared_examples/malicious_regexp_shared_examples.rb
index db69b75c0c8..a86050e2cf2 100644
--- a/spec/support/shared_examples/malicious_regexp_shared_examples.rb
+++ b/spec/support/shared_examples/malicious_regexp_shared_examples.rb
@@ -2,7 +2,8 @@ require 'timeout'
shared_examples 'malicious regexp' do
let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
- let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
+ let(:malicious_regexp_re2) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
+ let(:malicious_regexp_ruby) { '/^(([a-z])+.)+[A-Z]([a-z])+$/i' }
it 'takes under a second' do
expect { Timeout.timeout(1) { subject } }.not_to raise_error