From 17592a6d37cb9f75d41e78affc2d43257dd18d94 Mon Sep 17 00:00:00 2001 From: murphy Date: Wed, 6 Jan 2010 22:41:34 +0000 Subject: Added some benchmark experiments for highlighting without Tokens. --- etc/coderay-lib.tmproj | 83 ++++++++++++++------- etc/speedup/current.rb | 132 ++++++++++++++++++++++++++++++++ etc/speedup/direct-stream.rb | 174 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 25 deletions(-) create mode 100644 etc/speedup/current.rb create mode 100644 etc/speedup/direct-stream.rb diff --git a/etc/coderay-lib.tmproj b/etc/coderay-lib.tmproj index 0c3eaa3..53b50b3 100644 --- a/etc/coderay-lib.tmproj +++ b/etc/coderay-lib.tmproj @@ -2,11 +2,11 @@ + currentDocument + speedup/current.rb documents - expanded - name lib regexFolderFilter @@ -32,9 +32,7 @@ filename ../Changes.textile lastUsed - 2010-01-01T06:18:17Z - selected - + 2010-01-01T07:33:23Z filename @@ -51,6 +49,8 @@ ../ftp.yaml + expanded + name etc regexFolderFilter @@ -97,8 +97,6 @@ 2010-01-01T05:57:27Z - expanded - name functional regexFolderFilter @@ -110,13 +108,13 @@ filename ../test/scanners/coderay_suite.rb lastUsed - 2010-01-01T06:09:10Z + 2010-01-06T09:17:36Z filename ../test/scanners/suite.rb lastUsed - 2010-01-01T05:50:18Z + 2010-01-04T01:36:09Z filename @@ -126,7 +124,7 @@ fileHierarchyDrawerWidth - 200 + 151 metaData ../lib/coderay/scanners/delphi.rb @@ -187,20 +185,6 @@ firstVisibleLine 0 - ../lib/coderay/scanners/json.rb - - caret - - column - 25 - line - 15 - - firstVisibleColumn - 0 - firstVisibleLine - 0 - ../lib/coderay/scanners/php.rb caret @@ -289,10 +273,59 @@ 76 + speedup/current.rb + + caret + + column + 38 + line + 115 + + firstVisibleColumn + 0 + firstVisibleLine + 95 + + speedup/direct-stream.rb + + caret + + column + 0 + line + 151 + + columnSelection + + firstVisibleColumn + 0 + firstVisibleLine + 139 + selectFrom + + column + 0 + line + 150 + + selectTo + + column + 0 + line + 151 + + + openDocuments + + speedup/direct-stream.rb + speedup/current.rb + showFileHierarchyDrawer windowFrame - {{203, 5}, {1067, 773}} + {{161, 4}, {1119, 774}} diff --git a/etc/speedup/current.rb b/etc/speedup/current.rb new file mode 100644 index 0000000..71acae6 --- /dev/null +++ b/etc/speedup/current.rb @@ -0,0 +1,132 @@ +require 'strscan' +require 'benchmark' + +class Scanner < StringScanner + + def initialize code + super code + @tokens = Tokens.new + end + + def tokenize + scan_tokens @tokens + @tokens + end + +protected + + def scan_tokens tokens + until eos? + if matched = scan(/\s+/) + tokens << [matched, :space] + elsif matched = scan(/!/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/=/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/%/) + tokens << [matched, :not_going_to_happen] + elsif matched = scan(/\w+/) + tokens << [matched, :word] + elsif matched = scan(/[,.]/) + tokens << [matched, :op] + elsif scan(/\(/) + tokens << [:open, :par] + elsif scan(/\)/) + tokens << [:close, :par] + else + raise + end + end + end + +end + + +class Tokens < Array +end + + +class Encoder + + def encode_tokens tokens + @out = '' + compile tokens + @out + end + +protected + + if RUBY_VERSION >= '1.9' || defined?(JRUBY_VERSION) + def compile tokens + for text, kind in tokens + token text, kind + end + end + else + def compile tokens + tokens.each(&method(:token).to_proc) + end + end + + def token content, kind + encoded_token = + case content + when ::String + text_token content, kind + when :open + open kind + when :close + close kind + when ::Symbol + block_token content, kind + else + raise 'Unknown token content type: %p' % [content] + end + @out << encoded_token + end + + def text_token text, kind + if kind == :space + text + else + text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \ + "#{kind}(#{text})" + end + end + + def block_token action, kind + case action + when :open + open kind + when :close + close kind + end + end + + def open kind + "#{kind}<" + end + + def close kind + '>' + end +end + +N = (10 ** (ARGV.first || 5).to_i) +code = " alpha, beta, (gamma).\n" * N +scanner = Scanner.new code +encoder = Encoder.new + +tokens = nil +time_scanning = Benchmark.realtime do + tokens = scanner.tokenize +end +puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / time_scanning / 1000] + +time_encoding = Benchmark.realtime do + out = encoder.encode_tokens(tokens).size +end +puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / time_encoding / 1000] + +time = time_scanning + time_encoding +puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / time / 1000] diff --git a/etc/speedup/direct-stream.rb b/etc/speedup/direct-stream.rb new file mode 100644 index 0000000..3c15511 --- /dev/null +++ b/etc/speedup/direct-stream.rb @@ -0,0 +1,174 @@ +require 'strscan' +require 'benchmark' + +class Scanner < StringScanner + + def initialize code + super code + end + + def tokenize encoder = Tokens.new + scan_tokens encoder + encoder + end + +protected + + def scan_tokens encoder + until eos? + if matched = scan(/\s+/) + encoder.text_token matched, :space + elsif matched = scan(/!/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/=/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/%/) + encoder.text_token matched, :not_going_to_happen + elsif matched = scan(/\w+/) + encoder.text_token matched, :word + elsif matched = scan(/[,.]/) + encoder.text_token matched, :op + elsif scan(/\(/) + encoder.open :par + elsif scan(/\)/) + encoder.close :par + else + raise + end + end + end + +end + + +class Tokens < Array + alias token push + alias text_token push + alias block_token push + def open kind; push :open, kind end + def close kind; push :close, kind end +end + + +class Encoder + + def setup + @out = '' + @opened = [] + end + + def finish + while kind = @opened.pop + close kind + end + @out + end + + def encode_tokens tokens + setup + compile tokens + finish + end + + def encode_stream scanner + setup + scanner.tokenize self + finish + end + + def token content, kind + if content.is_a? ::String + text_token content, kind + elsif content.is_a? ::Symbol + block_token content, kind + else + raise 'Unknown token content type: %p' % [content] + end + end + + def text_token text, kind + @out << + if kind == :space + text + else + text.gsub!(/[)\\]/, '\\\\\0') # escape ) and \ + "#{kind}(#{text})" + end + end + + def block_token action, kind + case action + when :open + open kind + when :close + close kind + else + raise + end + end + + def open kind + @opened << kind + @out << "#{kind}<" + end + + def close kind + @opened.pop + @out << '>' + end + +protected + + def compile tokens + content = nil + for item in tokens + if content + case content + when ::String + text_token content, item + content = nil + when :open + open item + content = nil + when :close + close item + content = nil + when ::Symbol + block_token content, kind + content = nil + else + raise + end + else + content = item + end + end + raise if content + end + +end + +N = (10 ** (ARGV.first || 5).to_i) +code = " alpha, beta, (gamma).\n" * N +scanner = Scanner.new code +encoder = Encoder.new + +tokens = nil +time_scanning = Benchmark.realtime do + tokens = scanner.tokenize +end +puts 'Scanning: %0.2fs -- %0.0f kTok/s' % [time_scanning, tokens.size / 2 / time_scanning / 1000] + +time_encoding = Benchmark.realtime do + encoder.encode_tokens tokens +end +puts 'Encoding: %0.2fs -- %0.0f kTok/s' % [time_encoding, tokens.size / 2 / time_encoding / 1000] + +time = time_scanning + time_encoding +puts 'Together: %0.2fs -- %0.0f kTok/s' % [time, tokens.size / 2 / time / 1000] + +scanner.reset +time = Benchmark.realtime do + encoder.encode_stream scanner +end +puts 'Scanning + Encoding: %0.2fs -- %0.0f kTok/s' % [time, (N * 11 + 1) / time / 1000] -- cgit v1.2.1