summaryrefslogtreecommitdiff
path: root/lib/gitlab/changelog/parser.rb
blob: fac6fc19148f639e511d80896280c7802fdf1f29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# frozen_string_literal: true

module Gitlab
  module Changelog
    # A parser for the template syntax used for generating changelogs.
    #
    # As a quick primer on the template syntax, a basic template looks like
    # this:
    #
    #     {% each users %}
    #     Name: {{name}}
    #     Age: {{age}}
    #
    #     {% if birthday %}
    #     This user is celebrating their birthday today! Yay!
    #     {% end %}
    #     {% end %}
    #
    # For more information, refer to the Parslet documentation found at
    # http://kschiess.github.io/parslet/.
    class Parser < Parslet::Parser
      root(:exprs)

      rule(:exprs) do
        (
          variable | if_expr | each_expr | escaped | text | newline
        ).repeat.as(:exprs)
      end

      rule(:space) { match('[ \\t]') }
      rule(:whitespace) { match('\s').repeat }
      rule(:lf) { str("\n") }
      rule(:newline) { lf.as(:text) }

      # Escaped newlines are ignored, allowing the user to control the
      # whitespace in the output. All other escape sequences are treated as
      # literal text.
      #
      # For example, this:
      #
      #     foo \
      #     bar
      #
      # Is parsed into this:
      #
      #     foo bar
      rule(:escaped) do
        backslash = str('\\')

        (backslash >> lf).ignore | (backslash >> chars).as(:text)
      end

      # A sequence of regular characters, with the exception of newlines and
      # escaped newlines.
      rule(:chars) do
        char = match("[^{\\\\\n]")

        # The rules here are such that we do treat single curly braces or
        # non-opening tags (e.g. `{foo}`) as text, but not opening tags
        # themselves (e.g. `{{`).
        (
          char.repeat(1) | curly_open >> (curly_open | percent).absent?
        ).repeat(1)
      end

      rule(:text) { chars.as(:text) }

      # An integer, limited to 10 digits (= a 32 bits integer).
      #
      # The size is limited to prevents users from creating integers that are
      # too large, as this may result in runtime errors.
      rule(:integer) { match('\d').repeat(1, 10).as(:int) }

      # An identifier to look up in a data structure.
      #
      # We only support simple ASCII identifiers as we simply don't have a need
      # for more complex identifiers (e.g. those containing multibyte
      # characters).
      rule(:ident) { match('[a-zA-Z_]').repeat(1).as(:ident) }

      # A selector is used for reading a value, consisting of one or more
      # "steps".
      #
      # Examples:
      #
      #     name
      #     users.0.name
      #     0
      #     it
      rule(:selector) do
        step = ident | integer

        whitespace >>
          (step >> (str('.') >> step).repeat).as(:selector) >>
          whitespace
      end

      rule(:curly_open) { str('{') }
      rule(:curly_close) { str('}') }
      rule(:percent) { str('%') }

      # A variable tag.
      #
      # Examples:
      #
      #     {{name}}
      #     {{users.0.name}}
      rule(:variable) do
        curly_open.repeat(2) >> selector.as(:variable) >> curly_close.repeat(2)
      end

      rule(:expr_open) { curly_open >> percent >> whitespace }
      rule(:expr_close) do
        # Since whitespace control is important (as Markdown is whitespace
        # sensitive), we default to stripping a newline that follows a %} tag.
        # This is less annoying compared to having to opt-in to this behaviour.
        whitespace >> percent >> curly_close >> lf.maybe.ignore
      end

      rule(:end_tag) { expr_open >> str('end') >> expr_close }

      # An `if` expression, with an optional `else` clause.
      #
      # Examples:
      #
      #     {% if foo %}
      #     yes
      #     {% end %}
      #
      #     {% if foo %}
      #     yes
      #     {% else %}
      #     no
      #     {% end %}
      rule(:if_expr) do
        else_tag =
          expr_open >> str('else') >> expr_close >> exprs.as(:false_body)

        expr_open >>
          str('if') >>
          space.repeat(1) >>
          selector.as(:if) >>
          expr_close >>
          exprs.as(:true_body) >>
          else_tag.maybe >>
          end_tag
      end

      # An `each` expression, used for iterating over collections.
      #
      # Example:
      #
      #     {% each users %}
      #     * {{name}}
      #     {% end %}
      rule(:each_expr) do
        expr_open >>
          str('each') >>
          space.repeat(1) >>
          selector.as(:each) >>
          expr_close >>
          exprs.as(:body) >>
          end_tag
      end

      def parse_and_transform(input)
        AST::Transformer.new.apply(parse(input))
      rescue Parslet::ParseFailed => ex
        # We raise a custom error so it's easier to catch different changelog
        # related errors. In addition, this ensures the caller of this method
        # doesn't depend on a Parslet specific error class.
        raise Error, "Failed to parse the template: #{ex.message}"
      end
    end
  end
end