blob: 8c0bd62f80af971ff1c0033173d6ce85ec709095 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# frozen_string_literal: true
module Banzai
module Filter
# See comments in MarkdownPreEscapeFilter for details on strategy
class MarkdownPostEscapeFilter < HTML::Pipeline::Filter
LITERAL_KEYWORD = MarkdownPreEscapeFilter::LITERAL_KEYWORD
LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-(.*?)-#{LITERAL_KEYWORD}}.freeze
NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
SPAN_REGEX = %r{<span>(.*?)</span>}.freeze
XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze
XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze
XPATH_CODE_SPAN = Gitlab::Utils::Nokogiri.css_to_xpath('code > span').freeze
def call
return doc unless result[:escaped_literals]
new_html = unescaped_literals(doc.to_html)
new_html = add_spans(new_html)
@doc = parse_html(new_html)
remove_spans_in_certain_attributes
remove_spans_in_code
doc
end
private
# For any literals that actually didn't get escape processed
# (for example in code blocks), remove the special sequence.
def unescaped_literals(html)
html.gsub!(NOT_LITERAL_REGEX) do |match|
last_match = ::Regexp.last_match(1)
last_match_token = last_match.sub('%5C', '\\')
escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:token] == last_match_token }
escaped_char = escaped_item ? escaped_item[:escaped] : last_match
escaped_char = escaped_char.sub('\\', '%5C') if last_match.start_with?('%5C')
escaped_char
end
html
end
# Replace any left over literal sequences with `span` so that our
# reference processing is short-circuited
def add_spans(html)
html.gsub!(LITERAL_REGEX) do |match|
last_match = ::Regexp.last_match(1)
last_match_token = "\\#{last_match}"
escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:token] == last_match_token }
escaped_char = escaped_item ? escaped_item[:char] : ::Regexp.last_match(1)
"<span>#{escaped_char}</span>"
end
html
end
# Since literals are converted in links, we need to remove any surrounding `span`.
def remove_spans_in_certain_attributes
doc.xpath(XPATH_A).each do |node|
node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href']
node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title']
end
doc.xpath(XPATH_LANG_TAG).each do |node|
node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang']
end
end
# Any `<span>` that makes it into a `<code>` element is from the math processing,
# convert back to the escaped character, such as `\$`
def remove_spans_in_code
doc.xpath(XPATH_CODE_SPAN).each do |node|
escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:char] == node.content && item[:latex] }
node.replace(escaped_item[:escaped]) if escaped_item
end
end
end
end
end
|