summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2017-06-14 14:58:52 +0000
committerJames Edwards-Jones <jedwardsjones@gitlab.com>2017-07-19 16:43:55 +0100
commit5ba0577326c4f7b91bf6cfd1fde29a90ff11561d (patch)
treeaa4f8f973726ed1bed05f066c24d72ac6b10a98c
parent4c9da11444df4a83fc228591d8e920ec046bc45d (diff)
downloadgitlab-ce-5ba0577326c4f7b91bf6cfd1fde29a90ff11561d.tar.gz
Merge branch '24570-use-re2-for-user-supplied-regexp-9-0' into 'security-9-0'
[security-9-0] Use re2 for user-supplied regexps See merge request !2122
-rw-r--r--Gemfile3
-rw-r--r--Gemfile.lock2
-rw-r--r--app/models/ci/build.rb3
-rw-r--r--lib/gitlab/route_map.rb8
-rw-r--r--lib/gitlab/untrusted_regexp.rb53
-rw-r--r--spec/lib/gitlab/route_map_spec.rb13
-rw-r--r--spec/lib/gitlab/untrusted_regexp_spec.rb80
-rw-r--r--spec/models/ci/build_spec.rb24
-rw-r--r--spec/support/malicious_regexp_shared_examples.rb8
9 files changed, 186 insertions, 8 deletions
diff --git a/Gemfile b/Gemfile
index 2f813324a35..61a9fe4b958 100644
--- a/Gemfile
+++ b/Gemfile
@@ -153,6 +153,9 @@ gem 'rainbow', '~> 2.1.0'
# GitLab settings
gem 'settingslogic', '~> 2.0.9'
+# Linear-time regex library for untrusted regular expressions
+gem 're2', '~> 1.0.0'
+
# Misc
gem 'version_sorter', '~> 2.1.0'
diff --git a/Gemfile.lock b/Gemfile.lock
index fd44d4973ab..9b1e8e4fedb 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -591,6 +591,7 @@ GEM
debugger-ruby_core_source (~> 1.3)
rdoc (4.2.2)
json (~> 1.4)
+ re2 (1.0.0)
recaptcha (3.0.0)
json
recursive-open-struct (1.0.0)
@@ -968,6 +969,7 @@ DEPENDENCIES
rainbow (~> 2.1.0)
rblineprof (~> 0.3.6)
rdoc (~> 4.2)
+ re2 (~> 1.0.0)
recaptcha (~> 3.0)
redcarpet (~> 3.4)
redis (~> 3.2)
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 8431c5f228c..8f64d87d635 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -251,7 +251,8 @@ module Ci
def extract_coverage(text, regex)
return unless regex
- matches = text.scan(Regexp.new(regex)).last
+ regex = Gitlab::UntrustedRegexp.new(regex)
+ matches = regex.scan(text).last
matches = matches.last if matches.is_a?(Array)
coverage = matches.gsub(/\d+(\.\d+)?/).first
diff --git a/lib/gitlab/route_map.rb b/lib/gitlab/route_map.rb
index 36791fae60f..76cde2c9e96 100644
--- a/lib/gitlab/route_map.rb
+++ b/lib/gitlab/route_map.rb
@@ -18,7 +18,11 @@ module Gitlab
mapping = @map.find { |mapping| mapping[:source] === path }
return unless mapping
- path.sub(mapping[:source], mapping[:public])
+ if mapping[:source].is_a?(String)
+ path.sub(mapping[:source], mapping[:public])
+ else
+ mapping[:source].replace(path, mapping[:public])
+ end
end
private
@@ -35,7 +39,7 @@ module Gitlab
source_pattern = source_pattern[1...-1].gsub('\/', '/')
begin
- source_pattern = /\A#{source_pattern}\z/
+ source_pattern = Gitlab::UntrustedRegexp.new('\A' + source_pattern + '\z')
rescue RegexpError => e
raise FormatError, "Route map entry source is not a valid regular expression: #{e}"
end
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb
new file mode 100644
index 00000000000..8b43f0053d6
--- /dev/null
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -0,0 +1,53 @@
+module Gitlab
+ # An untrusted regular expression is any regexp containing patterns sourced
+ # from user input.
+ #
+ # Ruby's built-in regular expression library allows patterns which complete in
+ # exponential time, permitting denial-of-service attacks.
+ #
+ # Not all regular expression features are available in untrusted regexes, and
+ # there is a strict limit on total execution time. See the RE2 documentation
+ # at https://github.com/google/re2/wiki/Syntax for more details.
+ class UntrustedRegexp
+ delegate :===, to: :regexp
+
+ def initialize(pattern)
+ @regexp = RE2::Regexp.new(pattern, log_errors: false)
+
+ raise RegexpError.new(regexp.error) unless regexp.ok?
+ end
+
+ def replace_all(text, rewrite)
+ RE2.GlobalReplace(text, regexp, rewrite)
+ end
+
+ def scan(text)
+ scan_regexp.scan(text).map do |match|
+ if regexp.number_of_capturing_groups == 0
+ match.first
+ else
+ match
+ end
+ end
+ end
+
+ def replace(text, rewrite)
+ RE2.Replace(text, regexp, rewrite)
+ end
+
+ private
+
+ attr_reader :regexp
+
+ # RE2 scan operates differently to Ruby scan when there are no capture
+ # groups, so work around it
+ def scan_regexp
+ @scan_regexp ||=
+ if regexp.number_of_capturing_groups == 0
+ RE2::Regexp.new('(' + regexp.source + ')')
+ else
+ regexp
+ end
+ end
+ end
+end
diff --git a/spec/lib/gitlab/route_map_spec.rb b/spec/lib/gitlab/route_map_spec.rb
index 2370f56a613..566266242d1 100644
--- a/spec/lib/gitlab/route_map_spec.rb
+++ b/spec/lib/gitlab/route_map_spec.rb
@@ -55,6 +55,19 @@ describe Gitlab::RouteMap, lib: true do
end
describe '#public_path_for_source_path' do
+ context 'malicious regexp' do
+ include_examples 'malicious regexp'
+
+ subject do
+ map = described_class.new(<<-"MAP".strip_heredoc)
+ - source: '#{malicious_regexp}'
+ public: '/'
+ MAP
+
+ map.public_path_for_source_path(malicious_text)
+ end
+ end
+
subject do
described_class.new(<<-'MAP'.strip_heredoc)
# Team data
diff --git a/spec/lib/gitlab/untrusted_regexp_spec.rb b/spec/lib/gitlab/untrusted_regexp_spec.rb
new file mode 100644
index 00000000000..66045917cb3
--- /dev/null
+++ b/spec/lib/gitlab/untrusted_regexp_spec.rb
@@ -0,0 +1,80 @@
+require 'spec_helper'
+
+describe Gitlab::UntrustedRegexp do
+ describe '#initialize' do
+ subject { described_class.new(pattern) }
+
+ context 'invalid regexp' do
+ let(:pattern) { '[' }
+
+ it { expect { subject }.to raise_error(RegexpError) }
+ end
+ end
+
+ describe '#replace_all' do
+ it 'replaces all instances of the match in a string' do
+ result = described_class.new('foo').replace_all('foo bar foo', 'oof')
+
+ expect(result).to eq('oof bar oof')
+ end
+ end
+
+ describe '#replace' do
+ it 'replaces the first instance of the match in a string' do
+ result = described_class.new('foo').replace('foo bar foo', 'oof')
+
+ expect(result).to eq('oof bar foo')
+ end
+ end
+
+ describe '#===' do
+ it 'returns true for a match' do
+ result = described_class.new('foo') === 'a foo here'
+
+ expect(result).to be_truthy
+ end
+
+ it 'returns false for no match' do
+ result = described_class.new('foo') === 'a bar here'
+
+ expect(result).to be_falsy
+ end
+ end
+
+ describe '#scan' do
+ subject { described_class.new(regexp).scan(text) }
+ context 'malicious regexp' do
+ let(:text) { malicious_text }
+ let(:regexp) { malicious_regexp }
+
+ include_examples 'malicious regexp'
+ end
+
+ context 'no capture group' do
+ let(:regexp) { '.+' }
+ let(:text) { 'foo' }
+
+ it 'returns the whole match' do
+ is_expected.to eq(['foo'])
+ end
+ end
+
+ context 'one capture group' do
+ let(:regexp) { '(f).+' }
+ let(:text) { 'foo' }
+
+ it 'returns the captured part' do
+ is_expected.to eq([%w[f]])
+ end
+ end
+
+ context 'two capture groups' do
+ let(:regexp) { '(f).(o)' }
+ let(:text) { 'foo' }
+
+ it 'returns the captured parts' do
+ is_expected.to eq([%w[f o]])
+ end
+ end
+ end
+end
diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb
index 8dbcf50ee0c..9128c3aaabd 100644
--- a/spec/models/ci/build_spec.rb
+++ b/spec/models/ci/build_spec.rb
@@ -533,35 +533,49 @@ describe Ci::Build, :models do
end
describe '#extract_coverage' do
+ subject { build.extract_coverage(data, regex) }
+
context 'valid content & regex' do
- subject { build.extract_coverage('Coverage 1033 / 1051 LOC (98.29%) covered', '\(\d+.\d+\%\) covered') }
+ let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered' }
+ let(:regex) { '\(\d+.\d+\%\) covered' }
it { is_expected.to eq(98.29) }
end
context 'valid content & bad regex' do
- subject { build.extract_coverage('Coverage 1033 / 1051 LOC (98.29%) covered', 'very covered') }
+ let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' }
+ let(:regex) { 'very covered' }
it { is_expected.to be_nil }
end
context 'no coverage content & regex' do
- subject { build.extract_coverage('No coverage for today :sad:', '\(\d+.\d+\%\) covered') }
+ let(:data) { 'No coverage for today :sad:' }
+ let(:regex) { '\(\d+.\d+\%\) covered' }
it { is_expected.to be_nil }
end
context 'multiple results in content & regex' do
- subject { build.extract_coverage(' (98.39%) covered. (98.29%) covered', '\(\d+.\d+\%\) covered') }
+ let(:data) { ' (98.39%) covered. (98.29%) covered' }
+ let(:regex) { '\(\d+.\d+\%\) covered' }
it { is_expected.to eq(98.29) }
end
context 'using a regex capture' do
- subject { build.extract_coverage('TOTAL 9926 3489 65%', 'TOTAL\s+\d+\s+\d+\s+(\d{1,3}\%)') }
+ let(:data) { 'TOTAL 9926 3489 65%' }
+ let(:regex) { 'TOTAL\s+\d+\s+\d+\s+(\d{1,3}\%)' }
it { is_expected.to eq(65) }
end
+
+ context 'malicious regexp' do
+ let(:data) { malicious_text }
+ let(:regex) { malicious_regexp }
+
+ include_examples 'malicious regexp'
+ end
end
describe '#first_pending' do
diff --git a/spec/support/malicious_regexp_shared_examples.rb b/spec/support/malicious_regexp_shared_examples.rb
new file mode 100644
index 00000000000..ac5d22298bb
--- /dev/null
+++ b/spec/support/malicious_regexp_shared_examples.rb
@@ -0,0 +1,8 @@
+shared_examples 'malicious regexp' do
+ let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
+ let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
+
+ it 'takes under a second' do
+ expect { Timeout.timeout(1) { subject } }.not_to raise_error
+ end
+end