diff options
Diffstat (limited to 'lib/banzai/filter/markdown_post_escape_filter.rb')
-rw-r--r-- | lib/banzai/filter/markdown_post_escape_filter.rb | 76 |
1 files changed, 60 insertions, 16 deletions
diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb index 09ae09a22ae..8c0bd62f80a 100644 --- a/lib/banzai/filter/markdown_post_escape_filter.rb +++ b/lib/banzai/filter/markdown_post_escape_filter.rb @@ -2,33 +2,69 @@ module Banzai module Filter + # See comments in MarkdownPreEscapeFilter for details on strategy class MarkdownPostEscapeFilter < HTML::Pipeline::Filter LITERAL_KEYWORD = MarkdownPreEscapeFilter::LITERAL_KEYWORD LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-(.*?)-#{LITERAL_KEYWORD}}.freeze NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze SPAN_REGEX = %r{<span>(.*?)</span>}.freeze - CSS_A = 'a' - XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_A).freeze - CSS_LANG_TAG = 'pre' - XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_LANG_TAG).freeze + XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze + XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze + XPATH_CODE_SPAN = Gitlab::Utils::Nokogiri.css_to_xpath('code > span').freeze def call return doc unless result[:escaped_literals] - # For any literals that actually didn't get escape processed - # (for example in code blocks), remove the special sequence. - html.gsub!(NOT_LITERAL_REGEX, '\1') + new_html = unescaped_literals(doc.to_html) + new_html = add_spans(new_html) - # Replace any left over literal sequences with `span` so that our - # reference processing is short-circuited - html.gsub!(LITERAL_REGEX, '<span>\1</span>') + @doc = parse_html(new_html) - # Since literals are converted in links, we need to remove any surrounding `span`. - # Note: this could have been done in the renderer, - # Banzai::Renderer::CommonMark::HTML. However, we eventually want to use - # the built-in compiled renderer, rather than the ruby version, for speed. - # So let's do this work here. + remove_spans_in_certain_attributes + remove_spans_in_code + + doc + end + + private + + # For any literals that actually didn't get escape processed + # (for example in code blocks), remove the special sequence. + def unescaped_literals(html) + html.gsub!(NOT_LITERAL_REGEX) do |match| + last_match = ::Regexp.last_match(1) + last_match_token = last_match.sub('%5C', '\\') + + escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:token] == last_match_token } + escaped_char = escaped_item ? escaped_item[:escaped] : last_match + + escaped_char = escaped_char.sub('\\', '%5C') if last_match.start_with?('%5C') + + escaped_char + end + + html + end + + # Replace any left over literal sequences with `span` so that our + # reference processing is short-circuited + def add_spans(html) + html.gsub!(LITERAL_REGEX) do |match| + last_match = ::Regexp.last_match(1) + last_match_token = "\\#{last_match}" + + escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:token] == last_match_token } + escaped_char = escaped_item ? escaped_item[:char] : ::Regexp.last_match(1) + + "<span>#{escaped_char}</span>" + end + + html + end + + # Since literals are converted in links, we need to remove any surrounding `span`. + def remove_spans_in_certain_attributes doc.xpath(XPATH_A).each do |node| node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href'] node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title'] @@ -37,8 +73,16 @@ module Banzai doc.xpath(XPATH_LANG_TAG).each do |node| node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang'] end + end - doc + # Any `<span>` that makes it into a `<code>` element is from the math processing, + # convert back to the escaped character, such as `\$` + def remove_spans_in_code + doc.xpath(XPATH_CODE_SPAN).each do |node| + escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:char] == node.content && item[:latex] } + + node.replace(escaped_item[:escaped]) if escaped_item + end end end end |