From 038d6febedc03280bd686d929057c02d05f2afd6 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 3 Aug 2016 16:52:08 +0200 Subject: Improve performance of SyntaxHighlightFilter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using Rouge::Lexer.find instead of find_fancy() and memoizing the HTML formatter we can speed up the highlighting process by between 1.7 and 1.8 times (at least when measured using synthetic benchmarks). To measure this I used the following benchmark: require 'benchmark/ips' input = '' Dir['./app/controllers/**/*.rb'].each do |controller| input << <<-EOF
#{File.read(controller).strip}
EOF end document = Nokogiri::HTML.fragment(input) filter = Banzai::Filter::SyntaxHighlightFilter.new(document) puts "Input size: #{(input.bytesize.to_f / 1024).round(2)} KB" Benchmark.ips do |bench| bench.report 'call' do filter.call end end This benchmark produces 250 KB of input. Before these changes the timing output would be as follows: Calculating ------------------------------------- call 1.000 i/100ms ------------------------------------------------- call 22.439 (±35.7%) i/s - 93.000 After these changes the output instead is as follows: Calculating ------------------------------------- call 1.000 i/100ms ------------------------------------------------- call 41.283 (±38.8%) i/s - 148.000 Note that due to the fairly high standard deviation and this being a synthetic benchmark it's entirely possible the real-world improvements are smaller. --- CHANGELOG | 1 + lib/banzai/filter/syntax_highlight_filter.rb | 26 ++++++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c099c63ce86..a07b85ca27f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ v 8.11.0 (unreleased) - Limit git rev-list output count to one in forced push check - Clean up unused routes (Josef Strzibny) - Add green outline to New Branch button. !5447 (winniehell) + - Improve performance of syntax highlighting Markdown code blocks - Update to gitlab_git 10.4.1 and take advantage of preserved Ref objects - Retrieve rendered HTML from cache in one request - Fix renaming repository when name contains invalid chararacters under project settings diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb index 91f0159f9a1..fcdb496aed2 100644 --- a/lib/banzai/filter/syntax_highlight_filter.rb +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -17,15 +17,12 @@ module Banzai def highlight_node(node) language = node.attr('class') - code = node.text - + code = node.text css_classes = "code highlight" - - lexer = Rouge::Lexer.find_fancy(language) || Rouge::Lexers::PlainText - formatter = Rouge::Formatters::HTML.new + lexer = lexer_for(language) begin - code = formatter.format(lexer.lex(code)) + code = format(lex(lexer, code)) css_classes << " js-syntax-highlight #{lexer.tag}" rescue @@ -41,14 +38,27 @@ module Banzai private + # Separate method so it can be instrumented. + def lex(lexer, code) + lexer.lex(code) + end + + def format(tokens) + rouge_formatter.format(tokens) + end + + def lexer_for(language) + (Rouge::Lexer.find(language) || Rouge::Lexers::PlainText).new + end + def replace_parent_pre_element(node, highlighted) # Replace the parent `pre` element with the entire highlighted block node.parent.replace(highlighted) end # Override Rouge::Plugins::Redcarpet#rouge_formatter - def rouge_formatter(lexer) - Rouge::Formatters::HTML.new + def rouge_formatter(lexer = nil) + @rouge_formatter ||= Rouge::Formatters::HTML.new end end end -- cgit v1.2.1