diff options
Diffstat (limited to 'lib/gitlab/untrusted_regexp.rb')
-rw-r--r-- | lib/gitlab/untrusted_regexp.rb | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb new file mode 100644 index 00000000000..187a9e1145f --- /dev/null +++ b/lib/gitlab/untrusted_regexp.rb @@ -0,0 +1,73 @@ +module Gitlab + # An untrusted regular expression is any regexp containing patterns sourced + # from user input. + # + # Ruby's built-in regular expression library allows patterns which complete in + # exponential time, permitting denial-of-service attacks. + # + # Not all regular expression features are available in untrusted regexes, and + # there is a strict limit on total execution time. See the RE2 documentation + # at https://github.com/google/re2/wiki/Syntax for more details. + class UntrustedRegexp + delegate :===, to: :regexp + + def initialize(pattern) + @regexp = RE2::Regexp.new(pattern, log_errors: false) + + raise RegexpError.new(regexp.error) unless regexp.ok? + end + + def replace_all(text, rewrite) + RE2.GlobalReplace(text, regexp, rewrite) + end + + def scan(text) + text = text.dup # modified in-place + results = [] + + loop do + match = scan_regexp.match(text) + break unless match + + # Ruby scan returns empty strings, not nil + groups = match.to_a.map(&:to_s) + + results << + if regexp.number_of_capturing_groups.zero? + groups[0] + else + groups[1..-1] + end + + matchsize = match.end(0) + + # No further matches + break unless matchsize.present? + + text.slice!(0, matchsize) + break unless text.present? + end + + results + end + + def replace(text, rewrite) + RE2.Replace(text, regexp, rewrite) + end + + private + + attr_reader :regexp + + # RE2 scan operates differently to Ruby scan when there are no capture + # groups, so work around it + def scan_regexp + @scan_regexp ||= + if regexp.number_of_capturing_groups.zero? + RE2::Regexp.new('(' + regexp.source + ')') + else + regexp + end + end + end +end |