lib/banzai/filter/gollum_tags_filter.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

# frozen_string_literal: true

module Banzai
  module Filter
    # HTML Filter for parsing Gollum's tags in HTML. It's only parses the
    # following tags:
    #
    # - Link to internal pages:
    #
    #   * [[Bug Reports]]
    #   * [[How to Contribute|Contributing]]
    #
    # - Link to external resources:
    #
    #   * [[http://en.wikipedia.org/wiki/Git_(software)]]
    #   * [[Git|http://en.wikipedia.org/wiki/Git_(software)]]
    #
    # - Link internal images, the special attributes will be ignored:
    #
    #   * [[images/logo.png]]
    #   * [[images/logo.png|alt=Logo]]
    #
    # - Link external images, the special attributes will be ignored:
    #
    #   * [[http://example.com/images/logo.png]]
    #   * [[http://example.com/images/logo.png|alt=Logo]]
    #
    # - Insert a Table of Contents list:
    #
    #   * [[_TOC_]]
    #
    # Based on Gollum::Filter::Tags
    #
    # Context options:
    #   :project_wiki (required) - Current project wiki.
    #
    class GollumTagsFilter < HTML::Pipeline::Filter
      include ActionView::Helpers::TagHelper

      # Pattern to match tags content that should be parsed in HTML.
      #
      # Gollum's tags have been made to resemble the tags of other markups,
      # especially MediaWiki. The basic syntax is:
      #
      # [[tag]]
      #
      # Some tags will accept attributes which are separated by pipe
      # symbols.Some attributes must precede the tag and some must follow it:
      #
      # [[prefix-attribute|tag]]
      # [[tag|suffix-attribute]]
      #
      # See https://github.com/gollum/gollum/wiki
      #
      # Rubular: http://rubular.com/r/7dQnE5CUCH
      TAGS_PATTERN = /\[\[(.+?)\]\]/.freeze

      # Pattern to match allowed image extensions
      ALLOWED_IMAGE_EXTENSIONS = /.+(jpg|png|gif|svg|bmp)\z/i.freeze

      # Do not perform linking inside these tags.
      IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set

      def call
        doc.search(".//text()").each do |node|
          next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS)

          # A Gollum ToC tag is `[[_TOC_]]`, but due to MarkdownFilter running
          # before this one, it will be converted into `[[<em>TOC</em>]]`, so it
          # needs special-case handling
          if toc_tag?(node)
            process_toc_tag(node)
          else
            content = node.content

            next unless content =~ TAGS_PATTERN

            html = process_tag($1)

            if html && html != node.content
              node.replace(html)
            end
          end
        end

        doc
      end

      private

      # Replace an entire `[[<em>TOC</em>]]` node with the result generated by
      # TableOfContentsFilter
      def process_toc_tag(node)
        node.parent.parent.replace(result[:toc].presence || '')
      end

      # Process a single tag into its final HTML form.
      #
      # tag - The String tag contents (the stuff inside the double brackets).
      #
      # Returns the String HTML version of the tag.
      def process_tag(tag)
        parts = tag.split('|')

        return if parts.size.zero?

        process_image_tag(parts) || process_page_link_tag(parts)
      end

      # Attempt to process the tag as an image tag.
      #
      # tag - The String tag contents (the stuff inside the double brackets).
      #
      # Returns the String HTML if the tag is a valid image tag or nil
      # if it is not.
      def process_image_tag(parts)
        content = parts[0].strip

        return unless image?(content)

        if url?(content)
          path = content
        elsif file = project_wiki.find_file(content)
          path = ::File.join project_wiki_base_path, file.path
        end

        if path
          content_tag(:img, nil, data: { src: path }, class: 'gfm')
        end
      end

      def toc_tag?(node)
        node.content == 'TOC' &&
          node.parent.name == 'em' &&
          node.parent.parent.text == '[[TOC]]'
      end

      def image?(path)
        path =~ ALLOWED_IMAGE_EXTENSIONS
      end

      def url?(path)
        path.start_with?(*%w(http https))
      end

      # Attempt to process the tag as a page link tag.
      #
      # tag - The String tag contents (the stuff inside the double brackets).
      #
      # Returns the String HTML if the tag is a valid page link tag or nil
      # if it is not.
      def process_page_link_tag(parts)
        if parts.size == 1
          reference = parts[0].strip
        else
          name, reference = *parts.compact.map(&:strip)
        end

        href =
          if url?(reference)
            reference
          else
            ::File.join(project_wiki_base_path, reference)
          end

        content_tag(:a, name || reference, href: href, class: 'gfm')
      end

      def project_wiki
        context[:project_wiki]
      end

      def project_wiki_base_path
        project_wiki&.wiki_base_path
      end

      # Ensure that a :project_wiki key exists in context
      #
      # Note that while the key might exist, its value could be nil!
      def validate
        needs :project_wiki
      end
    end
  end
end