summaryrefslogtreecommitdiff
path: root/lib/gitlab/untrusted_regexp.rb
blob: 187a9e1145fb1636f80bbc1cce903dc4a61d1d10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
module Gitlab
  # An untrusted regular expression is any regexp containing patterns sourced
  # from user input.
  #
  # Ruby's built-in regular expression library allows patterns which complete in
  # exponential time, permitting denial-of-service attacks.
  #
  # Not all regular expression features are available in untrusted regexes, and
  # there is a strict limit on total execution time. See the RE2 documentation
  # at https://github.com/google/re2/wiki/Syntax for more details.
  class UntrustedRegexp
    delegate :===, to: :regexp

    def initialize(pattern)
      @regexp = RE2::Regexp.new(pattern, log_errors: false)

      raise RegexpError.new(regexp.error) unless regexp.ok?
    end

    def replace_all(text, rewrite)
      RE2.GlobalReplace(text, regexp, rewrite)
    end

    def scan(text)
      text = text.dup # modified in-place
      results = []

      loop do
        match = scan_regexp.match(text)
        break unless match

        # Ruby scan returns empty strings, not nil
        groups = match.to_a.map(&:to_s)

        results << 
          if regexp.number_of_capturing_groups.zero?
            groups[0]
          else
            groups[1..-1]
          end

        matchsize = match.end(0)

        # No further matches
        break unless matchsize.present?

        text.slice!(0, matchsize)
        break unless text.present?
      end

      results
    end

    def replace(text, rewrite)
      RE2.Replace(text, regexp, rewrite)
    end

    private

    attr_reader :regexp

    # RE2 scan operates differently to Ruby scan when there are no capture
    # groups, so work around it
    def scan_regexp
      @scan_regexp ||=
        if regexp.number_of_capturing_groups.zero?
          RE2::Regexp.new('(' + regexp.source + ')')
        else
          regexp
        end
    end
  end
end