From 34ab6dfa051c29d27353a9f555e713f36c7954a4 Mon Sep 17 00:00:00 2001 From: Brett Walker Date: Fri, 4 Jan 2019 20:54:02 -0600 Subject: Properly process footnotes in markdown All the ids and classes were stripped. Add them back in and make ids unique --- lib/banzai/filter/footnote_filter.rb | 55 ++++++++++++++++++++++++++++++++ lib/banzai/filter/sanitization_filter.rb | 27 ++++++++++++++-- lib/banzai/pipeline/gfm_pipeline.rb | 1 + 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 lib/banzai/filter/footnote_filter.rb (limited to 'lib') diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb new file mode 100644 index 00000000000..a7120fbd46e --- /dev/null +++ b/lib/banzai/filter/footnote_filter.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Banzai + module Filter + # HTML Filter for footnotes + # + # Footnotes are supported in CommonMark. However we were stripping + # the ids during sanitization. Those are now allowed. + # + # Footnotes are numbered the same - the first one has `id=fn1`, the + # second is `id=fn2`, etc. In order to allow footnotes when rendering + # multiple markdown blocks on a page, we need to make each footnote + # reference unique. + # + # This filter adds a random number to each footnote (the same number + # can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`. + # + class FootnoteFilter < HTML::Pipeline::Filter + INTEGER_PATTERN = /\A\d+\Z/.freeze + + def call + return doc unless first_footnote = doc.at_css('ol > li[id=fn1]') + + # Sanitization stripped off the section wrapper - add it back in + first_footnote.parent.wrap('
') + + doc.css('sup > a[id]').each do |link_node| + ref_num = link_node[:id].delete_prefix('fnref') + footnote_node = doc.at_css("li[id=fn#{ref_num}]") + backref_node = doc.at_css("li[id=fn#{ref_num}] a[href=\"#fnref#{ref_num}\"]") + + if ref_num =~ INTEGER_PATTERN && footnote_node && backref_node + rand_ref_num = "#{ref_num}-#{random_number}" + link_node[:href] = "#fn#{rand_ref_num}" + link_node[:id] = "fnref#{rand_ref_num}" + footnote_node[:id] = "fn#{rand_ref_num}" + backref_node[:href] = "#fnref#{rand_ref_num}" + + # Sanitization stripped off class - add it back in + link_node.parent.append_class('footnote-ref') + backref_node.append_class('footnote-backref') + end + end + + doc + end + + private + + def random_number + @random_number ||= rand(10000) + end + end + end +end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index 8ba09290e6d..d05518edcea 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -8,8 +8,10 @@ module Banzai class SanitizationFilter < HTML::Pipeline::SanitizationFilter include Gitlab::Utils::StrongMemoize - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze - TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/ + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/.freeze + FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d\z/.freeze + FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d\z/.freeze def whitelist strong_memoize(:whitelist) do @@ -57,6 +59,13 @@ module Banzai # Remove any `style` properties not required for table alignment whitelist[:transformers].push(self.class.remove_unsafe_table_style) + # Allow `id` in a and li elements for footnotes + whitelist[:attributes]['a'].push('id') + whitelist[:attributes]['li'] = %w(id) + + # ...but remove any `id` properties not matching for footnotes + whitelist[:transformers].push(self.class.remove_non_footnote_ids) + whitelist end @@ -112,6 +121,20 @@ module Banzai end end end + + def remove_non_footnote_ids + lambda do |env| + node = env[:node] + + return unless node.name == 'a' || node.name == 'li' + return unless node.has_attribute?('id') + + return if node.name == 'a' && node['id'] =~ FOOTNOTE_LINK_REFERENCE_PATTERN + return if node.name == 'li' && node['id'] =~ FOOTNOTE_LI_REFERENCE_PATTERN + + node.remove_attribute('id') + end + end end end end diff --git a/lib/banzai/pipeline/gfm_pipeline.rb b/lib/banzai/pipeline/gfm_pipeline.rb index 5f13a6d6cde..d860dad0b6c 100644 --- a/lib/banzai/pipeline/gfm_pipeline.rb +++ b/lib/banzai/pipeline/gfm_pipeline.rb @@ -30,6 +30,7 @@ module Banzai Filter::AutolinkFilter, Filter::ExternalLinkFilter, Filter::SuggestionFilter, + Filter::FootnoteFilter, *reference_filters, -- cgit v1.2.1 From cc036417278111efc7e8e686339ab0191a324364 Mon Sep 17 00:00:00 2001 From: Brett Walker Date: Thu, 10 Jan 2019 17:28:44 -0600 Subject: Updates based on review comments --- lib/banzai/filter/footnote_filter.rb | 14 +++++++------- lib/banzai/filter/sanitization_filter.rb | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb index a7120fbd46e..dc00ca73147 100644 --- a/lib/banzai/filter/footnote_filter.rb +++ b/lib/banzai/filter/footnote_filter.rb @@ -16,25 +16,25 @@ module Banzai # can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`. # class FootnoteFilter < HTML::Pipeline::Filter - INTEGER_PATTERN = /\A\d+\Z/.freeze + INTEGER_PATTERN = /\A\d+\z/.freeze def call return doc unless first_footnote = doc.at_css('ol > li[id=fn1]') # Sanitization stripped off the section wrapper - add it back in first_footnote.parent.wrap('
') + rand_suffix = "-#{random_number}" doc.css('sup > a[id]').each do |link_node| ref_num = link_node[:id].delete_prefix('fnref') footnote_node = doc.at_css("li[id=fn#{ref_num}]") - backref_node = doc.at_css("li[id=fn#{ref_num}] a[href=\"#fnref#{ref_num}\"]") + backref_node = footnote_node.at_css("a[href=\"#fnref#{ref_num}\"]") if ref_num =~ INTEGER_PATTERN && footnote_node && backref_node - rand_ref_num = "#{ref_num}-#{random_number}" - link_node[:href] = "#fn#{rand_ref_num}" - link_node[:id] = "fnref#{rand_ref_num}" - footnote_node[:id] = "fn#{rand_ref_num}" - backref_node[:href] = "#fnref#{rand_ref_num}" + link_node[:href] += rand_suffix + link_node[:id] += rand_suffix + footnote_node[:id] += rand_suffix + backref_node[:href] += rand_suffix # Sanitization stripped off class - add it back in link_node.parent.append_class('footnote-ref') diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index d05518edcea..16accefa850 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -10,8 +10,8 @@ module Banzai UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/.freeze - FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d\z/.freeze - FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d\z/.freeze + FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d+\z/.freeze + FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d+\z/.freeze def whitelist strong_memoize(:whitelist) do -- cgit v1.2.1 From 7e900ed8569aff9fc549e95b6271c3d578acbd51 Mon Sep 17 00:00:00 2001 From: Brett Walker Date: Wed, 16 Jan 2019 21:13:44 -0600 Subject: Refactoring and addressing review comments and additional spec --- lib/banzai/filter/footnote_filter.rb | 23 ++++++++++++++++++----- lib/banzai/filter/sanitization_filter.rb | 18 +++++++----------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/banzai/filter/footnote_filter.rb b/lib/banzai/filter/footnote_filter.rb index dc00ca73147..97527976437 100644 --- a/lib/banzai/filter/footnote_filter.rb +++ b/lib/banzai/filter/footnote_filter.rb @@ -16,19 +16,24 @@ module Banzai # can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`. # class FootnoteFilter < HTML::Pipeline::Filter - INTEGER_PATTERN = /\A\d+\z/.freeze + INTEGER_PATTERN = /\A\d+\z/.freeze + FOOTNOTE_ID_PREFIX = 'fn'.freeze + FOOTNOTE_LINK_ID_PREFIX = 'fnref'.freeze + FOOTNOTE_LI_REFERENCE_PATTERN = /\A#{FOOTNOTE_ID_PREFIX}\d+\z/.freeze + FOOTNOTE_LINK_REFERENCE_PATTERN = /\A#{FOOTNOTE_LINK_ID_PREFIX}\d+\z/.freeze + FOOTNOTE_START_NUMBER = 1 def call - return doc unless first_footnote = doc.at_css('ol > li[id=fn1]') + return doc unless first_footnote = doc.at_css("ol > li[id=#{fn_id(FOOTNOTE_START_NUMBER)}]") # Sanitization stripped off the section wrapper - add it back in first_footnote.parent.wrap('
') rand_suffix = "-#{random_number}" doc.css('sup > a[id]').each do |link_node| - ref_num = link_node[:id].delete_prefix('fnref') - footnote_node = doc.at_css("li[id=fn#{ref_num}]") - backref_node = footnote_node.at_css("a[href=\"#fnref#{ref_num}\"]") + ref_num = link_node[:id].delete_prefix(FOOTNOTE_LINK_ID_PREFIX) + footnote_node = doc.at_css("li[id=#{fn_id(ref_num)}]") + backref_node = footnote_node.at_css("a[href=\"##{fnref_id(ref_num)}\"]") if ref_num =~ INTEGER_PATTERN && footnote_node && backref_node link_node[:href] += rand_suffix @@ -50,6 +55,14 @@ module Banzai def random_number @random_number ||= rand(10000) end + + def fn_id(num) + "#{FOOTNOTE_ID_PREFIX}#{num}" + end + + def fnref_id(num) + "#{FOOTNOTE_LINK_ID_PREFIX}#{num}" + end end end end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index 16accefa850..edc053638a8 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -8,10 +8,8 @@ module Banzai class SanitizationFilter < HTML::Pipeline::SanitizationFilter include Gitlab::Utils::StrongMemoize - UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze - TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/.freeze - FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d+\z/.freeze - FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d+\z/.freeze + UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze + TABLE_ALIGNMENT_PATTERN = /text-align: (?center|left|right)/.freeze def whitelist strong_memoize(:whitelist) do @@ -47,10 +45,9 @@ module Banzai whitelist[:attributes][:all].delete('name') whitelist[:attributes]['a'].push('name') - # Allow any protocol in `a` elements... + # Allow any protocol in `a` elements + # and then remove links with unsafe protocols whitelist[:protocols].delete('a') - - # ...but then remove links with unsafe protocols whitelist[:transformers].push(self.class.remove_unsafe_links) # Remove `rel` attribute from `a` elements @@ -60,10 +57,9 @@ module Banzai whitelist[:transformers].push(self.class.remove_unsafe_table_style) # Allow `id` in a and li elements for footnotes + # and remove any `id` properties not matching for footnotes whitelist[:attributes]['a'].push('id') whitelist[:attributes]['li'] = %w(id) - - # ...but remove any `id` properties not matching for footnotes whitelist[:transformers].push(self.class.remove_non_footnote_ids) whitelist @@ -129,8 +125,8 @@ module Banzai return unless node.name == 'a' || node.name == 'li' return unless node.has_attribute?('id') - return if node.name == 'a' && node['id'] =~ FOOTNOTE_LINK_REFERENCE_PATTERN - return if node.name == 'li' && node['id'] =~ FOOTNOTE_LI_REFERENCE_PATTERN + return if node.name == 'a' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LINK_REFERENCE_PATTERN + return if node.name == 'li' && node['id'] =~ Banzai::Filter::FootnoteFilter::FOOTNOTE_LI_REFERENCE_PATTERN node.remove_attribute('id') end -- cgit v1.2.1