summaryrefslogtreecommitdiff
path: root/lib/gitlab/changelog/template/compiler.rb
blob: fa7724aa2dacbbe81d3404ce8d6285cdfc36066b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# frozen_string_literal: true

module Gitlab
  module Changelog
    module Template
      # Compiler is used for turning a minimal user templating language into an
      # ERB template, without giving the user access to run arbitrary code.
      #
      # The template syntax is deliberately made as minimal as possible, and
      # only supports the following:
      #
      # * Printing a value
      # * Iterating over collections
      # * if/else
      #
      # The syntax looks as follows:
      #
      #     {% each users %}
      #
      #     Name: {{user}}
      #     Likes cats: {% if likes_cats %}yes{% else %}no{% end %}
      #
      #     {% end %}
      #
      # Newlines can be escaped by ending a line with a backslash. So this:
      #
      #     foo \
      #     bar
      #
      # Is the same as this:
      #
      #     foo bar
      #
      # Templates are compiled into ERB templates, while taking care to make
      # sure the user can't run arbitrary code. By using ERB we can let it do
      # the heavy lifting of rendering data; all we need to provide is a
      # translation layer.
      #
      # # Security
      #
      # The template syntax this compiler exposes is safe to be used by
      # untrusted users. Not only are they unable to run arbitrary code, the
      # compiler also enforces a limit on the integer sizes and the number of
      # nested loops. ERB tags added by the user are also disabled.
      class Compiler
        # A pattern to match a single integer, with an upper size limit.
        #
        # We enforce a limit of 10 digits (= a 32 bits integer) so users can't
        # trigger the allocation of infinitely large bignums, or trigger
        # RangeError errors when using such integers to access an array value.
        INTEGER = /^\d{1,10}$/.freeze

        # The name/path of a variable, such as `user.address.city`.
        #
        # It's important that this regular expression _doesn't_ allow for
        # anything but letters, numbers, and underscores, otherwise a user may
        # use those to "escape" our template and run arbirtary Ruby code. For
        # example, take this variable:
        #
        #     {{') ::Kernel.exit #'}}
        #
        # This would then be compiled into:
        #
        #     <%= read(variables, '') ::Kernel.exit #'') %>
        #
        # Restricting the allowed characters makes this impossible.
        VAR_NAME = /([\w\.]+)/.freeze

        # A variable tag, such as `{{username}}`.
        VAR = /{{ \s* #{VAR_NAME} \s* }}/x.freeze

        # The opening tag for a statement.
        STM_START = /{% \s*/x.freeze

        # The closing tag for a statement.
        STM_END = /\s* %}/x.freeze

        # A regular `end` closing tag.
        NORMAL_END = /#{STM_START} end #{STM_END}/x.freeze

        # An `end` closing tag on its own line, without any non-whitespace
        # preceding or following it.
        #
        # These tags need some special care to make it easier to control
        # whitespace.
        LONELY_END = /^\s*#{NORMAL_END}\s$/x.freeze

        # An `else` tag.
        ELSE = /#{STM_START} else #{STM_END}/x.freeze

        # The start of an `each` tag.
        EACH = /#{STM_START} each \s+ #{VAR_NAME} #{STM_END}/x.freeze

        # The start of an `if` tag.
        IF = /#{STM_START} if \s+ #{VAR_NAME} #{STM_END}/x.freeze

        # The pattern to use for escaping newlines.
        ESCAPED_NEWLINE = /\\\n$/.freeze

        # The start tag for ERB tags. These tags will be escaped, preventing
        # users from using ERB directly.
        ERB_START_TAG = /<\\?\s*\\?\s*%/.freeze

        def compile(template)
          transformed_lines = ['<% it = variables %>']

          # ERB tags must be stripped here, otherwise a user may introduce ERB
          # tags by making clever use of whitespace. See
          # https://gitlab.com/gitlab-org/gitlab/-/issues/300224 for more
          # information.
          template = template.gsub(ERB_START_TAG, '<%%')

          template.each_line { |line| transformed_lines << transform(line) }

          # We use the full namespace here as otherwise Rails may use the wrong
          # constant when autoloading is used.
          ::Gitlab::Changelog::Template::Template.new(transformed_lines.join)
        end

        def transform(line)
          line.gsub!(ESCAPED_NEWLINE, '')

          # This replacement ensures that "end" blocks on their own lines
          # don't add extra newlines. Using an ERB -%> tag sadly swallows too
          # many newlines.
          line.gsub!(LONELY_END, '<% end %>')
          line.gsub!(NORMAL_END, '<% end %>')
          line.gsub!(ELSE, '<% else -%>')

          line.gsub!(EACH) do
            # No, `it; variables` isn't a syntax error. Using `;` marks
            # `variables` as block-local, making it possible to re-assign it
            # without affecting outer definitions of this variable. We use
            # this to scope template variables to the right input Hash.
            "<% each(#{read_path(Regexp.last_match(1))}) do |it; variables| -%><% variables = it -%>"
          end

          line.gsub!(IF) { "<% if truthy?(#{read_path(Regexp.last_match(1))}) -%>" }
          line.gsub!(VAR) { "<%= #{read_path(Regexp.last_match(1))} %>" }
          line
        end

        def read_path(path)
          return path if path == 'it'

          args = path.split('.')
          args.map! { |arg| arg.match?(INTEGER) ? "#{arg}" : "'#{arg}'" }

          "read(variables, #{args.join(', ')})"
        end
      end
    end
  end
end