summaryrefslogtreecommitdiff
path: root/lib/gitlab/untrusted_regexp.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/untrusted_regexp.rb')
-rw-r--r--lib/gitlab/untrusted_regexp.rb73
1 files changed, 73 insertions, 0 deletions
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb
new file mode 100644
index 00000000000..187a9e1145f
--- /dev/null
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -0,0 +1,73 @@
+module Gitlab
+ # An untrusted regular expression is any regexp containing patterns sourced
+ # from user input.
+ #
+ # Ruby's built-in regular expression library allows patterns which complete in
+ # exponential time, permitting denial-of-service attacks.
+ #
+ # Not all regular expression features are available in untrusted regexes, and
+ # there is a strict limit on total execution time. See the RE2 documentation
+ # at https://github.com/google/re2/wiki/Syntax for more details.
+ class UntrustedRegexp
+ delegate :===, to: :regexp
+
+ def initialize(pattern)
+ @regexp = RE2::Regexp.new(pattern, log_errors: false)
+
+ raise RegexpError.new(regexp.error) unless regexp.ok?
+ end
+
+ def replace_all(text, rewrite)
+ RE2.GlobalReplace(text, regexp, rewrite)
+ end
+
+ def scan(text)
+ text = text.dup # modified in-place
+ results = []
+
+ loop do
+ match = scan_regexp.match(text)
+ break unless match
+
+ # Ruby scan returns empty strings, not nil
+ groups = match.to_a.map(&:to_s)
+
+ results <<
+ if regexp.number_of_capturing_groups.zero?
+ groups[0]
+ else
+ groups[1..-1]
+ end
+
+ matchsize = match.end(0)
+
+ # No further matches
+ break unless matchsize.present?
+
+ text.slice!(0, matchsize)
+ break unless text.present?
+ end
+
+ results
+ end
+
+ def replace(text, rewrite)
+ RE2.Replace(text, regexp, rewrite)
+ end
+
+ private
+
+ attr_reader :regexp
+
+ # RE2 scan operates differently to Ruby scan when there are no capture
+ # groups, so work around it
+ def scan_regexp
+ @scan_regexp ||=
+ if regexp.number_of_capturing_groups.zero?
+ RE2::Regexp.new('(' + regexp.source + ')')
+ else
+ regexp
+ end
+ end
+ end
+end