From 9f4c7ab7553f9be7c9d14da0ba7462ad746c2f5d Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 10 Jul 2006 00:32:57 +0000 Subject: Big re-indenting - no more tabs! --- lib/coderay/duo.rb | 50 +-- lib/coderay/encoder.rb | 340 ++++++++-------- lib/coderay/encoders/_map.rb | 8 +- lib/coderay/encoders/count.rb | 24 +- lib/coderay/encoders/debug.rb | 80 ++-- lib/coderay/encoders/div.rb | 20 +- lib/coderay/encoders/html.rb | 478 +++++++++++----------- lib/coderay/encoders/html/classes.rb | 128 +++--- lib/coderay/encoders/html/css.rb | 108 ++--- lib/coderay/encoders/html/numerization.rb | 232 +++++------ lib/coderay/encoders/html/output.rb | 326 +++++++-------- lib/coderay/encoders/null.rb | 30 +- lib/coderay/encoders/page.rb | 22 +- lib/coderay/encoders/span.rb | 18 +- lib/coderay/encoders/statistic.rb | 124 +++--- lib/coderay/encoders/text.rb | 40 +- lib/coderay/encoders/tokens.rb | 68 ++-- lib/coderay/encoders/xml.rb | 114 +++--- lib/coderay/encoders/yaml.rb | 24 +- lib/coderay/helpers/filetype.rb | 305 +++++++------- lib/coderay/helpers/gzip_simple.rb | 135 ++++--- lib/coderay/helpers/plugin.rb | 592 +++++++++++++-------------- lib/coderay/helpers/word_list.rb | 93 +++-- lib/coderay/scanner.rb | 406 +++++++++---------- lib/coderay/scanners/_map.rb | 18 +- lib/coderay/scanners/c.rb | 300 +++++++------- lib/coderay/scanners/delphi.rb | 226 +++++------ lib/coderay/scanners/html.rb | 322 +++++++-------- lib/coderay/scanners/nitro_html.rb | 238 +++++------ lib/coderay/scanners/plaintext.rb | 14 +- lib/coderay/scanners/rhtml.rb | 118 +++--- lib/coderay/scanners/ruby.rb | 32 +- lib/coderay/scanners/ruby/patterns.rb | 14 +- lib/coderay/scanners/xml.rb | 20 +- lib/coderay/style.rb | 24 +- lib/coderay/styles/_map.rb | 6 +- lib/coderay/styles/cycnus.rb | 36 +- lib/coderay/styles/murphy.rb | 36 +- lib/coderay/tokens.rb | 636 +++++++++++++++--------------- 39 files changed, 2901 insertions(+), 2904 deletions(-) (limited to 'lib/coderay') diff --git a/lib/coderay/duo.rb b/lib/coderay/duo.rb index e4e80df..3125568 100644 --- a/lib/coderay/duo.rb +++ b/lib/coderay/duo.rb @@ -1,29 +1,29 @@ module CodeRay - - # = Duo - # - # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $ - # - # TODO: Doc. - class Duo - - attr_accessor :scanner, :encoder - - def initialize lang, format, options = {} - @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options) - @encoder = CodeRay.encoder format, options - end + + # = Duo + # + # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $ + # + # TODO: Doc. + class Duo - class << self - alias [] new - end + attr_accessor :scanner, :encoder + + def initialize lang, format, options = {} + @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options) + @encoder = CodeRay.encoder format, options + end + + class << self + alias [] new + end + + def encode code + @scanner.string = code + @encoder.encode_tokens(scanner.tokenize) + end + alias highlight encode + + end - def encode code - @scanner.string = code - @encoder.encode_tokens(scanner.tokenize) - end - alias highlight encode - - end - end diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb index fd809d5..1065a9c 100644 --- a/lib/coderay/encoder.rb +++ b/lib/coderay/encoder.rb @@ -1,173 +1,173 @@ module CodeRay - # This module holds the Encoder class and its subclasses. - # For example, the HTML encoder is named CodeRay::Encoders::HTML - # can be found in coderay/encoders/html. - # - # Encoders also provides methods and constants for the register - # mechanism and the [] method that returns the Encoder class - # belonging to the given format. - module Encoders - extend PluginHost - plugin_path File.dirname(__FILE__), 'encoders' - - # = Encoder - # - # The Encoder base class. Together with Scanner and - # Tokens, it forms the highlighting triad. - # - # Encoder instances take a Tokens object and do something with it. - # - # The most common Encoder is surely the HTML encoder - # (CodeRay::Encoders::HTML). It highlights the code in a colorful - # html page. - # If you want the highlighted code in a div or a span instead, - # use its subclasses Div and Span. - class Encoder - extend Plugin - plugin_host Encoders - - attr_reader :token_stream - - class << self - - # Returns if the Encoder can be used in streaming mode. - def streamable? - is_a? Streamable - end - - # If FILE_EXTENSION isn't defined, this method returns the - # downcase class name instead. - def const_missing sym - if sym == :FILE_EXTENSION - sym.to_s.downcase - else - super - end - end - - end - - # Subclasses are to store their default options in this constant. - DEFAULT_OPTIONS = { :stream => false } - - # The options you gave the Encoder at creating. - attr_accessor :options - - # Creates a new Encoder. - # +options+ is saved and used for all encode operations, as long - # as you don't overwrite it there by passing additional options. - # - # Encoder objects provide three encode methods: - # - encode simply takes a +code+ string and a +lang+ - # - encode_tokens expects a +tokens+ object instead - # - encode_stream is like encode, but uses streaming mode. - # - # Each method has an optional +options+ parameter. These are - # added to the options you passed at creation. - def initialize options = {} - @options = self.class::DEFAULT_OPTIONS.merge options - raise "I am only the basic Encoder class. I can't encode "\ - "anything. :( Use my subclasses." if self.class == Encoder - end - - # Encode a Tokens object. - def encode_tokens tokens, options = {} - options = @options.merge options - setup options - compile tokens, options - finish options - end - - # Encode the given +code+ after tokenizing it using the Scanner - # for +lang+. - def encode code, lang, options = {} - options = @options.merge options - scanner_options = CodeRay.get_scanner_options(options) - tokens = CodeRay.scan code, lang, scanner_options - encode_tokens tokens, options - end - - # You can use highlight instead of encode, if that seems - # more clear to you. - alias highlight encode - - # Encode the given +code+ using the Scanner for +lang+ in - # streaming mode. - def encode_stream code, lang, options = {} - raise NotStreamableError, self unless kind_of? Streamable - options = @options.merge options - setup options - scanner_options = CodeRay.get_scanner_options options - @token_stream = - CodeRay.scan_stream code, lang, scanner_options, &self - finish options - end - - # Behave like a proc. The token method is converted to a proc. - def to_proc - method(:token).to_proc - end - - # Return the default file extension for outputs of this encoder. - def file_extension - self.class::FILE_EXTENSION - end - - protected - - # Called with merged options before encoding starts. - # Sets @out to an empty string. - # - # See the HTML Encoder for an example of option caching. - def setup options - @out = '' - end - - # Called with +text+ and +kind+ of the currently scanned token. - # For simple scanners, it's enougth to implement this method. - # - # By default, it calls text_token or block_token, depending on - # whether +text+ is a String. - def token text, kind - if text.is_a? ::String - text_token text, kind - elsif text.is_a? ::Symbol - block_token text, kind - else - raise 'Unknown token text type: %p' % text - end - end - - def text_token text, kind - end - - def block_token action, kind - case action - when :open - open_token kind - when :close - close_token kind - else - raise 'unknown block action: %p' % action - end - end - - # Called with merged options after encoding starts. - # The return value is the result of encoding, typically @out. - def finish options - @out - end - - # Do the encoding. - # - # The already created +tokens+ object must be used; it can be a - # TokenStream or a Tokens object. - def compile tokens, options - tokens.each(&self) - end - - end - - end + # This module holds the Encoder class and its subclasses. + # For example, the HTML encoder is named CodeRay::Encoders::HTML + # can be found in coderay/encoders/html. + # + # Encoders also provides methods and constants for the register + # mechanism and the [] method that returns the Encoder class + # belonging to the given format. + module Encoders + extend PluginHost + plugin_path File.dirname(__FILE__), 'encoders' + + # = Encoder + # + # The Encoder base class. Together with Scanner and + # Tokens, it forms the highlighting triad. + # + # Encoder instances take a Tokens object and do something with it. + # + # The most common Encoder is surely the HTML encoder + # (CodeRay::Encoders::HTML). It highlights the code in a colorful + # html page. + # If you want the highlighted code in a div or a span instead, + # use its subclasses Div and Span. + class Encoder + extend Plugin + plugin_host Encoders + + attr_reader :token_stream + + class << self + + # Returns if the Encoder can be used in streaming mode. + def streamable? + is_a? Streamable + end + + # If FILE_EXTENSION isn't defined, this method returns the + # downcase class name instead. + def const_missing sym + if sym == :FILE_EXTENSION + sym.to_s.downcase + else + super + end + end + + end + + # Subclasses are to store their default options in this constant. + DEFAULT_OPTIONS = { :stream => false } + + # The options you gave the Encoder at creating. + attr_accessor :options + + # Creates a new Encoder. + # +options+ is saved and used for all encode operations, as long + # as you don't overwrite it there by passing additional options. + # + # Encoder objects provide three encode methods: + # - encode simply takes a +code+ string and a +lang+ + # - encode_tokens expects a +tokens+ object instead + # - encode_stream is like encode, but uses streaming mode. + # + # Each method has an optional +options+ parameter. These are + # added to the options you passed at creation. + def initialize options = {} + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Encoder class. I can't encode "\ + "anything. :( Use my subclasses." if self.class == Encoder + end + + # Encode a Tokens object. + def encode_tokens tokens, options = {} + options = @options.merge options + setup options + compile tokens, options + finish options + end + + # Encode the given +code+ after tokenizing it using the Scanner + # for +lang+. + def encode code, lang, options = {} + options = @options.merge options + scanner_options = CodeRay.get_scanner_options(options) + tokens = CodeRay.scan code, lang, scanner_options + encode_tokens tokens, options + end + + # You can use highlight instead of encode, if that seems + # more clear to you. + alias highlight encode + + # Encode the given +code+ using the Scanner for +lang+ in + # streaming mode. + def encode_stream code, lang, options = {} + raise NotStreamableError, self unless kind_of? Streamable + options = @options.merge options + setup options + scanner_options = CodeRay.get_scanner_options options + @token_stream = + CodeRay.scan_stream code, lang, scanner_options, &self + finish options + end + + # Behave like a proc. The token method is converted to a proc. + def to_proc + method(:token).to_proc + end + + # Return the default file extension for outputs of this encoder. + def file_extension + self.class::FILE_EXTENSION + end + + protected + + # Called with merged options before encoding starts. + # Sets @out to an empty string. + # + # See the HTML Encoder for an example of option caching. + def setup options + @out = '' + end + + # Called with +text+ and +kind+ of the currently scanned token. + # For simple scanners, it's enougth to implement this method. + # + # By default, it calls text_token or block_token, depending on + # whether +text+ is a String. + def token text, kind + if text.is_a? ::String + text_token text, kind + elsif text.is_a? ::Symbol + block_token text, kind + else + raise 'Unknown token text type: %p' % text + end + end + + def text_token text, kind + end + + def block_token action, kind + case action + when :open + open_token kind + when :close + close_token kind + else + raise 'unknown block action: %p' % action + end + end + + # Called with merged options after encoding starts. + # The return value is the result of encoding, typically @out. + def finish options + @out + end + + # Do the encoding. + # + # The already created +tokens+ object must be used; it can be a + # TokenStream or a Tokens object. + def compile tokens, options + tokens.each(&self) + end + + end + + end end diff --git a/lib/coderay/encoders/_map.rb b/lib/coderay/encoders/_map.rb index 13c4a9d..a22a951 100644 --- a/lib/coderay/encoders/_map.rb +++ b/lib/coderay/encoders/_map.rb @@ -1,8 +1,8 @@ module CodeRay module Encoders - - map :stats => :statistic, - :plain => :text - + + map :stats => :statistic, + :plain => :text + end end diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb index c662ead..6885541 100644 --- a/lib/coderay/encoders/count.rb +++ b/lib/coderay/encoders/count.rb @@ -1,21 +1,21 @@ module CodeRay module Encoders - class Count < Encoder + class Count < Encoder - include Streamable - register_for :count + include Streamable + register_for :count - protected + protected - def setup options - @out = 0 - end + def setup options + @out = 0 + end - def token text, kind - @out += 1 - end - end + def token text, kind + @out += 1 + end + end -end +end end diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb index d6aac8b..2639e1f 100644 --- a/lib/coderay/encoders/debug.rb +++ b/lib/coderay/encoders/debug.rb @@ -1,46 +1,46 @@ module CodeRay module Encoders - # = Debug Encoder - # - # Fast encoder producing simple debug output. - # - # It is readable and diff-able and is used for testing. - # - # You cannot fully restore the tokens information from the - # output, because consecutive :space tokens are merged. - # Use Tokens#dump for caching purposes. - class Debug < Encoder - - include Streamable - register_for :debug - - FILE_EXTENSION = 'raydebug' - - protected - def text_token text, kind - @out << - if kind == :space - text - else - text = text.gsub(/[)\\]/, '\\\\\0') - "#{kind}(#{text})" - end - end - - def block_token action, kind - @out << super - end - - def open_token kind - "#{kind}<" - end - - def close_token kind - ">" - end - - end + # = Debug Encoder + # + # Fast encoder producing simple debug output. + # + # It is readable and diff-able and is used for testing. + # + # You cannot fully restore the tokens information from the + # output, because consecutive :space tokens are merged. + # Use Tokens#dump for caching purposes. + class Debug < Encoder + + include Streamable + register_for :debug + + FILE_EXTENSION = 'raydebug' + + protected + def text_token text, kind + @out << + if kind == :space + text + else + text = text.gsub(/[)\\]/, '\\\\\0') + "#{kind}(#{text})" + end + end + + def block_token action, kind + @out << super + end + + def open_token kind + "#{kind}<" + end + + def close_token kind + ">" + end + + end end end diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb index c389a0d..ce558f2 100644 --- a/lib/coderay/encoders/div.rb +++ b/lib/coderay/encoders/div.rb @@ -1,20 +1,20 @@ module CodeRay module Encoders - - load :html - class Div < HTML + load :html - FILE_EXTENSION = 'div.html' + class Div < HTML - register_for :div + FILE_EXTENSION = 'div.html' - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :style, - :wrap => :div, - }) + register_for :div - end + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :div, + }) + + end end end diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index 15ccbc7..60c56c1 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -1,245 +1,245 @@ module CodeRay module Encoders - # = HTML Encoder - # - # This is CodeRay's most important highlighter: - # It provides save, fast XHTML generation and CSS support. - # - # == Usage - # - # require 'coderay' - # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page - # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> Some /code/ - # puts CodeRay.scan('Some /code/', :ruby).span #-> the same - # - # puts CodeRay.scan('Some code', :ruby).html( - # :wrap => nil, - # :line_numbers => :inline, - # :css => :style - # ) - # #-> 1 Some code - # - # == Options - # - # === :tab_width - # Convert \t characters to +n+ spaces (a number.) - # Default: 8 - # - # === :css - # How to include the styles; can be :class or :style. - # - # Default: :class - # - # === :wrap - # Wrap in :page, :div, :span or nil. - # - # You can also use Encoders::Div and Encoders::Span. - # - # Default: nil - # - # === :line_numbers - # Include line numbers in :table, :inline, :list or nil (no line numbers) - # - # Default: nil - # - # === :line_number_start - # Where to start with line number counting. - # - # Default: 1 - # - # === :bold_every - # Make every +n+-th number appear bold. - # - # Default: 10 - # - # === :hint - # Include some information into the output using the title attribute. - # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect). - # - # Default: false - class HTML < Encoder - - include Streamable - register_for :html - - FILE_EXTENSION = 'html' - - DEFAULT_OPTIONS = { - :tab_width => 8, - - :level => :xhtml, - :css => :class, - - :style => :cycnus, - - :wrap => nil, - - :line_numbers => nil, - :line_number_start => 1, - :bold_every => 10, - - :hint => false, - } - - helper :classes, :output, :css - - attr_reader :css - - protected - - HTML_ESCAPE = { #:nodoc: - '&' => '&', - '"' => '"', - '>' => '>', - '<' => '<', - } - - # This was to prevent illegal HTML. - # Strange chars should still be avoided in codes. - evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s] - evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' } - #ansi_chars = Array(0x7f..0xff) - #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i } - # \x9 (\t) and \xA (\n) not included - #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/ - HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/ - - TOKEN_KIND_TO_INFO = Hash.new { |h, kind| - h[kind] = - case kind - when :pre_constant - 'Predefined constant' - else - kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } - end - } - - # Generate a hint about the given +classes+ in a +hint+ style. - # - # +hint+ may be :info, :info_long or :debug. - def self.token_path_to_hint hint, classes - return '' unless hint - title = - case hint - when :info - TOKEN_KIND_TO_INFO[classes.first] - when :info_long - classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') - when :debug - classes.inspect - end - " title=\"#{title}\"" - end - - def setup options - super - - @HTML_ESCAPE = HTML_ESCAPE.dup - @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] - - @opened = [nil] - @css = CSS.new options[:style] - - hint = options[:hint] - if hint and not [:debug, :info, :info_long].include? hint - raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint - end - - case options[:css] - - when :class - @css_style = Hash.new do |h, k| - if k.is_a? Array - type = k.first - else - type = k - end - c = ClassOfKind[type] - if c == :NO_HIGHLIGHT and not hint - h[k] = false - else - title = HTML.token_path_to_hint hint, (k[1..-1] << k.first) - h[k] = '' % [title, c] - end - end - - when :style - @css_style = Hash.new do |h, k| - if k.is_a? Array - styles = k.dup - else - styles = [k] - end - type = styles.first - classes = styles.map { |c| ClassOfKind[c] } - if classes.first == :NO_HIGHLIGHT and not hint - h[k] = false - else - styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first - title = HTML.token_path_to_hint hint, styles - classes.delete 'il' - style = @css[*classes] - h[k] = - if style - '' % [title, style] - else - false - end - end - end - - else - raise ArgumentError, "Unknown value %p for :css." % options[:css] - - end - end - - def finish options - not_needed = @opened.shift - @out << '' * @opened.size - warn '%d tokens still open' % @opened.size unless @opened.empty? - - @out.extend Output - @out.css = @css - @out.numerize! options[:line_numbers], options - @out.wrap! options[:wrap] - - super - end - - def token text, type - if text.is_a? ::String - if text =~ /#{HTML_ESCAPE_PATTERN}/o - text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } - end - @opened[0] = type - if style = @css_style[@opened] - @out << style << text << '' - else - @out << text - end - else - case text - when :open - @opened[0] = type - @out << (@css_style[@opened] || '') - @opened << type - when :close - unless @opened.empty? - raise 'Malformed token stream: Trying to close a token that was never opened.' unless @opened.size > 1 - @out << '' - @opened.pop - end - when nil - raise 'Token with nil as text was given: %p' % [[text, type]] - else - raise 'unknown token kind: %p' % text - end - end - end - - end + # = HTML Encoder + # + # This is CodeRay's most important highlighter: + # It provides save, fast XHTML generation and CSS support. + # + # == Usage + # + # require 'coderay' + # puts CodeRay.scan('Some /code/', :ruby).html #-> a HTML page + # puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> Some /code/ + # puts CodeRay.scan('Some /code/', :ruby).span #-> the same + # + # puts CodeRay.scan('Some code', :ruby).html( + # :wrap => nil, + # :line_numbers => :inline, + # :css => :style + # ) + # #-> 1 Some code + # + # == Options + # + # === :tab_width + # Convert \t characters to +n+ spaces (a number.) + # Default: 8 + # + # === :css + # How to include the styles; can be :class or :style. + # + # Default: :class + # + # === :wrap + # Wrap in :page, :div, :span or nil. + # + # You can also use Encoders::Div and Encoders::Span. + # + # Default: nil + # + # === :line_numbers + # Include line numbers in :table, :inline, :list or nil (no line numbers) + # + # Default: nil + # + # === :line_number_start + # Where to start with line number counting. + # + # Default: 1 + # + # === :bold_every + # Make every +n+-th number appear bold. + # + # Default: 10 + # + # === :hint + # Include some information into the output using the title attribute. + # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect). + # + # Default: false + class HTML < Encoder + + include Streamable + register_for :html + + FILE_EXTENSION = 'html' + + DEFAULT_OPTIONS = { + :tab_width => 8, + + :level => :xhtml, + :css => :class, + + :style => :cycnus, + + :wrap => nil, + + :line_numbers => nil, + :line_number_start => 1, + :bold_every => 10, + + :hint => false, + } + + helper :classes, :output, :css + + attr_reader :css + + protected + + HTML_ESCAPE = { #:nodoc: + '&' => '&', + '"' => '"', + '>' => '>', + '<' => '<', + } + + # This was to prevent illegal HTML. + # Strange chars should still be avoided in codes. + evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s] + evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' } + #ansi_chars = Array(0x7f..0xff) + #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i } + # \x9 (\t) and \xA (\n) not included + #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/ + HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/ + + TOKEN_KIND_TO_INFO = Hash.new { |h, kind| + h[kind] = + case kind + when :pre_constant + 'Predefined constant' + else + kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize } + end + } + + # Generate a hint about the given +classes+ in a +hint+ style. + # + # +hint+ may be :info, :info_long or :debug. + def self.token_path_to_hint hint, classes + return '' unless hint + title = + case hint + when :info + TOKEN_KIND_TO_INFO[classes.first] + when :info_long + classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/') + when :debug + classes.inspect + end + " title=\"#{title}\"" + end + + def setup options + super + + @HTML_ESCAPE = HTML_ESCAPE.dup + @HTML_ESCAPE["\t"] = ' ' * options[:tab_width] + + @opened = [nil] + @css = CSS.new options[:style] + + hint = options[:hint] + if hint and not [:debug, :info, :info_long].include? hint + raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint + end + + case options[:css] + + when :class + @css_style = Hash.new do |h, k| + if k.is_a? Array + type = k.first + else + type = k + end + c = ClassOfKind[type] + if c == :NO_HIGHLIGHT and not hint + h[k] = false + else + title = HTML.token_path_to_hint hint, (k[1..-1] << k.first) + h[k] = '' % [title, c] + end + end + + when :style + @css_style = Hash.new do |h, k| + if k.is_a? Array + styles = k.dup + else + styles = [k] + end + type = styles.first + classes = styles.map { |c| ClassOfKind[c] } + if classes.first == :NO_HIGHLIGHT and not hint + h[k] = false + else + styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first + title = HTML.token_path_to_hint hint, styles + classes.delete 'il' + style = @css[*classes] + h[k] = + if style + '' % [title, style] + else + false + end + end + end + + else + raise ArgumentError, "Unknown value %p for :css." % options[:css] + + end + end + + def finish options + not_needed = @opened.shift + @out << '' * @opened.size + warn '%d tokens still open' % @opened.size unless @opened.empty? + + @out.extend Output + @out.css = @css + @out.numerize! options[:line_numbers], options + @out.wrap! options[:wrap] + + super + end + + def token text, type + if text.is_a? ::String + if text =~ /#{HTML_ESCAPE_PATTERN}/o + text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } + end + @opened[0] = type + if style = @css_style[@opened] + @out << style << text << '' + else + @out << text + end + else + case text + when :open + @opened[0] = type + @out << (@css_style[@opened] || '') + @opened << type + when :close + unless @opened.empty? + raise 'Malformed token stream: Trying to close a token that was never opened.' unless @opened.size > 1 + @out << '' + @opened.pop + end + when nil + raise 'Token with nil as text was given: %p' % [[text, type]] + else + raise 'unknown token kind: %p' % text + end + end + end + + end end end diff --git a/lib/coderay/encoders/html/classes.rb b/lib/coderay/encoders/html/classes.rb index e21fce8..8493fa0 100644 --- a/lib/coderay/encoders/html/classes.rb +++ b/lib/coderay/encoders/html/classes.rb @@ -1,73 +1,73 @@ module CodeRay module Encoders - class HTML + class HTML - ClassOfKind = { - :attribute_name => 'an', - :attribute_name_fat => 'af', - :attribute_value => 'av', - :attribute_value_fat => 'aw', - :bin => 'bi', - :char => 'ch', - :class => 'cl', - :class_variable => 'cv', - :color => 'cr', - :comment => 'c', - :constant => 'co', - :content => 'k', - :definition => 'df', - :delimiter => 'dl', - :directive => 'di', - :doc => 'do', - :doc_string => 'ds', - :entity => 'en', - :error => 'er', - :escape => 'e', - :exception => 'ex', - :float => 'fl', - :function => 'fu', - :global_variable => 'gv', - :hex => 'hx', - :include => 'ic', - :inline => 'il', - :instance_variable => 'iv', - :integer => 'i', - :interpreted => 'in', - :label => 'la', - :local_variable => 'lv', - :modifier => 'mod', - :oct => 'oc', - :operator_name => 'on', - :pre_constant => 'pc', - :pre_type => 'pt', - :predefined => 'pd', - :preprocessor => 'pp', - :regexp => 'rx', - :reserved => 'r', - :shell => 'sh', - :string => 's', - :symbol => 'sy', - :tag => 'ta', - :tag_fat => 'tf', - :tag_special => 'ts', - :type => 'ty', - :variable => 'v', - :xml_text => 'xt', + ClassOfKind = { + :attribute_name => 'an', + :attribute_name_fat => 'af', + :attribute_value => 'av', + :attribute_value_fat => 'aw', + :bin => 'bi', + :char => 'ch', + :class => 'cl', + :class_variable => 'cv', + :color => 'cr', + :comment => 'c', + :constant => 'co', + :content => 'k', + :definition => 'df', + :delimiter => 'dl', + :directive => 'di', + :doc => 'do', + :doc_string => 'ds', + :entity => 'en', + :error => 'er', + :escape => 'e', + :exception => 'ex', + :float => 'fl', + :function => 'fu', + :global_variable => 'gv', + :hex => 'hx', + :include => 'ic', + :inline => 'il', + :instance_variable => 'iv', + :integer => 'i', + :interpreted => 'in', + :label => 'la', + :local_variable => 'lv', + :modifier => 'mod', + :oct => 'oc', + :operator_name => 'on', + :pre_constant => 'pc', + :pre_type => 'pt', + :predefined => 'pd', + :preprocessor => 'pp', + :regexp => 'rx', + :reserved => 'r', + :shell => 'sh', + :string => 's', + :symbol => 'sy', + :tag => 'ta', + :tag_fat => 'tf', + :tag_special => 'ts', + :type => 'ty', + :variable => 'v', + :xml_text => 'xt', - :ident => :NO_HIGHLIGHT, # 'id' - #:operator => 'op', - :operator => :NO_HIGHLIGHT, # 'op' - :space => :NO_HIGHLIGHT, # 'sp' - :plain => :NO_HIGHLIGHT, - } - ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function] - ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter] - ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter] - ClassOfKind[:escape] = ClassOfKind[:delimiter] - ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!' + :ident => :NO_HIGHLIGHT, # 'id' + #:operator => 'op', + :operator => :NO_HIGHLIGHT, # 'op' + :space => :NO_HIGHLIGHT, # 'sp' + :plain => :NO_HIGHLIGHT, + } + ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function] + ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter] + ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter] + ClassOfKind[:escape] = ClassOfKind[:delimiter] + ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!' - end + end end end diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb index fcd9859..b76d682 100644 --- a/lib/coderay/encoders/html/css.rb +++ b/lib/coderay/encoders/html/css.rb @@ -1,65 +1,65 @@ module CodeRay module Encoders - class HTML - class CSS + class HTML + class CSS - attr :stylesheet + attr :stylesheet - def CSS.load_stylesheet style = nil - CodeRay::Styles[style] - end - - def initialize style = :default - @classes = Hash.new - style = CSS.load_stylesheet style - @stylesheet = [ - style::CSS_MAIN_STYLES, - style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ') - ].join("\n") - parse style::TOKEN_COLORS - end + def CSS.load_stylesheet style = nil + CodeRay::Styles[style] + end + + def initialize style = :default + @classes = Hash.new + style = CSS.load_stylesheet style + @stylesheet = [ + style::CSS_MAIN_STYLES, + style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ') + ].join("\n") + parse style::TOKEN_COLORS + end + + def [] *styles + cl = @classes[styles.first] + return '' unless cl + style = '' + 1.upto(styles.size) do |offset| + break if style = cl[styles[offset .. -1]] + end + raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? + return style + end + + private + + CSS_CLASS_PATTERN = / + ( (?: # $1 = classes + \s* \. [-\w]+ + )+ ) + \s* \{ \s* + ( [^\}]+ )? # $2 = style + \s* \} \s* + | + ( . ) # $3 = error + /mx + def parse stylesheet + stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| + raise "CSS parse error: '#{error.inspect}' not recognized" if error + styles = classes.scan(/[-\w]+/) + cl = styles.pop + @classes[cl] ||= Hash.new + @classes[cl][styles] = style.to_s.strip + end + end + + end + end - def [] *styles - cl = @classes[styles.first] - return '' unless cl - style = '' - 1.upto(styles.size) do |offset| - break if style = cl[styles[offset .. -1]] - end - raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? - return style - end - - private - - CSS_CLASS_PATTERN = / - ( (?: # $1 = classes - \s* \. [-\w]+ - )+ ) - \s* \{ \s* - ( [^\}]+ )? # $2 = style - \s* \} \s* - | - ( . ) # $3 = error - /mx - def parse stylesheet - stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| - raise "CSS parse error: '#{error.inspect}' not recognized" if error - styles = classes.scan(/[-\w]+/) - cl = styles.pop - @classes[cl] ||= Hash.new - @classes[cl][styles] = style.to_s.strip - end - end - - end - end - end end if $0 == __FILE__ - require 'pp' - pp CodeRay::Encoders::HTML::CSS.new + require 'pp' + pp CodeRay::Encoders::HTML::CSS.new end diff --git a/lib/coderay/encoders/html/numerization.rb b/lib/coderay/encoders/html/numerization.rb index 1e1f952..2960f87 100644 --- a/lib/coderay/encoders/html/numerization.rb +++ b/lib/coderay/encoders/html/numerization.rb @@ -1,122 +1,122 @@ module CodeRay module Encoders - class HTML - - module Output - - def numerize *args - clone.numerize!(*args) - end - -=begin NUMERIZABLE_WRAPPINGS = { - :table => [:div, :page, nil], - :inline => :all, - :list => [:div, :page, nil] - } - NUMERIZABLE_WRAPPINGS.default = :all -=end - def numerize! mode = :table, options = {} - return self unless mode - - options = DEFAULT_OPTIONS.merge options - - start = options[:line_number_start] - unless start.is_a? Integer - raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start - end - - #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode] - #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap] - # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]] - #end - - bold_every = options[:bold_every] - bolding = - if bold_every == false - proc { |line| line.to_s } - elsif bold_every.is_a? Integer - raise ArgumentError, ":bolding can't be 0." if bold_every == 0 - proc do |line| - if line % bold_every == 0 - "#{line}" # every bold_every-th number in bold - else - line.to_s - end - end - else - raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every - end - - case mode - when :inline - max_width = (start + line_count).to_s.size - line = start - gsub!(/^/) do - line_number = bolding.call line - indent = ' ' * (max_width - line.to_s.size) - res = "#{indent}#{line_number} " - line += 1 - res - end - - when :table - # This is really ugly. - # Because even monospace fonts seem to have different heights when bold, - # I make the newline bold, both in the code and the line numbers. - # FIXME Still not working perfect for Mr. Internet Exploder - # FIXME Firefox struggles with very long codes (> 200 lines) - line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") - line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ - line_numbers.gsub!(/\n/) { "\n" } - - line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) - gsub!(/\n/) { "\n" } - wrap_in! line_numbers_table_tpl - @wrapped_in = :div - - when :list - opened_tags = [] - gsub!(/^.*$\n?/) do |line| - line.chomp! - - open = opened_tags.join - line.scan(%r!<(/)?span[^>]*>?!) do |close,| - if close - opened_tags.pop - else - opened_tags << $& - end - end - close = '' * opened_tags.size - - "
  • #{open}#{line}#{close}
  • " - end - wrap_in! LIST - @wrapped_in = :div - - else - raise ArgumentError, 'Unknown value %p for mode: expected one of %p' % - [mode, [:table, :list, :inline]] - end - - self - end - - def line_count - line_count = count("\n") - position_of_last_newline = rindex(?\n) - if position_of_last_newline - after_last_newline = self[position_of_last_newline + 1 .. -1] - ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/] - line_count += 1 if not ends_with_newline - end - line_count - end - - end - - end + class HTML + + module Output + + def numerize *args + clone.numerize!(*args) + end + +=begin NUMERIZABLE_WRAPPINGS = { + :table => [:div, :page, nil], + :inline => :all, + :list => [:div, :page, nil] + } + NUMERIZABLE_WRAPPINGS.default = :all +=end + def numerize! mode = :table, options = {} + return self unless mode + + options = DEFAULT_OPTIONS.merge options + + start = options[:line_number_start] + unless start.is_a? Integer + raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start + end + + #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode] + #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap] + # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]] + #end + + bold_every = options[:bold_every] + bolding = + if bold_every == false + proc { |line| line.to_s } + elsif bold_every.is_a? Integer + raise ArgumentError, ":bolding can't be 0." if bold_every == 0 + proc do |line| + if line % bold_every == 0 + "#{line}" # every bold_every-th number in bold + else + line.to_s + end + end + else + raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every + end + + case mode + when :inline + max_width = (start + line_count).to_s.size + line = start + gsub!(/^/) do + line_number = bolding.call line + indent = ' ' * (max_width - line.to_s.size) + res = "#{indent}#{line_number} " + line += 1 + res + end + + when :table + # This is really ugly. + # Because even monospace fonts seem to have different heights when bold, + # I make the newline bold, both in the code and the line numbers. + # FIXME Still not working perfect for Mr. Internet Exploder + # FIXME Firefox struggles with very long codes (> 200 lines) + line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") + line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ + line_numbers.gsub!(/\n/) { "\n" } + + line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) + gsub!(/\n/) { "\n" } + wrap_in! line_numbers_table_tpl + @wrapped_in = :div + + when :list + opened_tags = [] + gsub!(/^.*$\n?/) do |line| + line.chomp! + + open = opened_tags.join + line.scan(%r!<(/)?span[^>]*>?!) do |close,| + if close + opened_tags.pop + else + opened_tags << $& + end + end + close = '' * opened_tags.size + + "
  • #{open}#{line}#{close}
  • " + end + wrap_in! LIST + @wrapped_in = :div + + else + raise ArgumentError, 'Unknown value %p for mode: expected one of %p' % + [mode, [:table, :list, :inline]] + end + + self + end + + def line_count + line_count = count("\n") + position_of_last_newline = rindex(?\n) + if position_of_last_newline + after_last_newline = self[position_of_last_newline + 1 .. -1] + ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/] + line_count += 1 if not ends_with_newline + end + line_count + end + + end + + end end end diff --git a/lib/coderay/encoders/html/output.rb b/lib/coderay/encoders/html/output.rb index 203caca..61258ee 100644 --- a/lib/coderay/encoders/html/output.rb +++ b/lib/coderay/encoders/html/output.rb @@ -1,195 +1,195 @@ module CodeRay module Encoders - class HTML - - # This module is included in the output String from thew HTML Encoder. - # - # It provides methods like wrap, div, page etc. - # - # Remember to use #clone instead of #dup to keep the modules the object was - # extended with. - # - # TODO: more doc. - module Output - - require 'coderay/encoders/html/numerization.rb' - - attr_accessor :css - - class << self - - # This makes Output look like a class. - # - # Example: - # - # a = Output.new 'Code' - # a.wrap! :page - def new string, css = CSS.new, element = nil - output = string.clone.extend self - output.wrapped_in = element - output.css = css - output - end - - # Raises an exception if an object that doesn't respond to to_str is extended by Output, - # to prevent users from misuse. Use Module#remove_method to disable. - def extended o - warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str - end - - def make_stylesheet css, in_tag = false - sheet = css.stylesheet - sheet = <<-CSS if in_tag + class HTML + + # This module is included in the output String from thew HTML Encoder. + # + # It provides methods like wrap, div, page etc. + # + # Remember to use #clone instead of #dup to keep the modules the object was + # extended with. + # + # TODO: more doc. + module Output + + require 'coderay/encoders/html/numerization.rb' + + attr_accessor :css + + class << self + + # This makes Output look like a class. + # + # Example: + # + # a = Output.new 'Code' + # a.wrap! :page + def new string, css = CSS.new, element = nil + output = string.clone.extend self + output.wrapped_in = element + output.css = css + output + end + + # Raises an exception if an object that doesn't respond to to_str is extended by Output, + # to prevent users from misuse. Use Module#remove_method to disable. + def extended o + warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str + end + + def make_stylesheet css, in_tag = false + sheet = css.stylesheet + sheet = <<-CSS if in_tag - CSS - sheet - end - - def page_template_for_css css - sheet = make_stylesheet css - PAGE.apply 'CSS', sheet - end - - # Define a new wrapper. This is meta programming. - def wrapper *wrappers - wrappers.each do |wrapper| - define_method wrapper do |*args| - wrap wrapper, *args - end - define_method "#{wrapper}!".to_sym do |*args| - wrap! wrapper, *args - end - end - end - - end - - wrapper :div, :span, :page - - def wrapped_in? element - wrapped_in == element - end - - def wrapped_in - @wrapped_in ||= nil - end - attr_writer :wrapped_in - - def wrap_in template - clone.wrap_in! template - end - - def wrap_in! template - Template.wrap! self, template, 'CONTENT' - self - end - - def wrap! element, *args - return self if not element or element == wrapped_in - case element - when :div - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil - wrap_in! DIV - when :span - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil - wrap_in! SPAN - when :page - wrap! :div if wrapped_in? nil - raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div - wrap_in! Output.page_template_for_css(@css) - when nil - return self - else - raise "Unknown value %p for :wrap" % element - end - @wrapped_in = element - self - end - - def wrap *args - clone.wrap!(*args) - end - - def stylesheet in_tag = false - Output.make_stylesheet @css, in_tag - end - - class Template < String - - def self.wrap! str, template, target - target = Regexp.new(Regexp.escape("<%#{target}%>")) - if template =~ target - str[0,0] = $` - str << $' - else - raise "Template target <%%%p%%> not found" % target - end - end - - def apply target, replacement - target = Regexp.new(Regexp.escape("<%#{target}%>")) - if self =~ target - Template.new($` + replacement + $') - else - raise "Template target <%%%p%%> not found" % target - end - end - - module Simple - def ` str #` - Template.new str - end - end - end - - extend Template::Simple + CSS + sheet + end + + def page_template_for_css css + sheet = make_stylesheet css + PAGE.apply 'CSS', sheet + end + + # Define a new wrapper. This is meta programming. + def wrapper *wrappers + wrappers.each do |wrapper| + define_method wrapper do |*args| + wrap wrapper, *args + end + define_method "#{wrapper}!".to_sym do |*args| + wrap! wrapper, *args + end + end + end + + end + + wrapper :div, :span, :page + + def wrapped_in? element + wrapped_in == element + end + + def wrapped_in + @wrapped_in ||= nil + end + attr_writer :wrapped_in + + def wrap_in template + clone.wrap_in! template + end + + def wrap_in! template + Template.wrap! self, template, 'CONTENT' + self + end + + def wrap! element, *args + return self if not element or element == wrapped_in + case element + when :div + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! DIV + when :span + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil + wrap_in! SPAN + when :page + wrap! :div if wrapped_in? nil + raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div + wrap_in! Output.page_template_for_css(@css) + when nil + return self + else + raise "Unknown value %p for :wrap" % element + end + @wrapped_in = element + self + end + + def wrap *args + clone.wrap!(*args) + end + + def stylesheet in_tag = false + Output.make_stylesheet @css, in_tag + end + + class Template < String + + def self.wrap! str, template, target + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if template =~ target + str[0,0] = $` + str << $' + else + raise "Template target <%%%p%%> not found" % target + end + end + + def apply target, replacement + target = Regexp.new(Regexp.escape("<%#{target}%>")) + if self =~ target + Template.new($` + replacement + $') + else + raise "Template target <%%%p%%> not found" % target + end + end + + module Simple + def ` str #` <-- for stupid editors + Template.new str + end + end + end + + extend Template::Simple #-- don't include the templates in docu - - SPAN = `<%CONTENT%>` - DIV = <<-`DIV` + SPAN = `<%CONTENT%>` + + DIV = <<-`DIV`
    -
    <%CONTENT%>
    +
    <%CONTENT%>
    - DIV + DIV - TABLE = <<-`TABLE` + TABLE = <<-`TABLE` - - + +
    <%LINE_NUMBERS%>
    <%CONTENT%>
    <%LINE_NUMBERS%>
    <%CONTENT%>
    - TABLE - # title="double click to expand" + TABLE + # title="double click to expand" - LIST = <<-`LIST` + LIST = <<-`LIST`
      <%CONTENT%>
    - LIST + LIST - PAGE = <<-`PAGE` + PAGE = <<-`PAGE` + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - - CodeRay HTML Encoder Example - + <%CONTENT%> - PAGE + PAGE - end + end - end + end end end diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb index 0e3d073..96d81fe 100644 --- a/lib/coderay/encoders/null.rb +++ b/lib/coderay/encoders/null.rb @@ -1,26 +1,26 @@ module CodeRay module Encoders - # = Null Encoder - # - # Does nothing and returns an empty string. - class Null < Encoder + # = Null Encoder + # + # Does nothing and returns an empty string. + class Null < Encoder - include Streamable - register_for :null + include Streamable + register_for :null - # Defined for faster processing - def to_proc - proc {} - end + # Defined for faster processing + def to_proc + proc {} + end - protected + protected - def token(*) - # do nothing - end + def token(*) + # do nothing + end - end + end end end diff --git a/lib/coderay/encoders/page.rb b/lib/coderay/encoders/page.rb index 74bdc55..1ed7985 100644 --- a/lib/coderay/encoders/page.rb +++ b/lib/coderay/encoders/page.rb @@ -1,21 +1,21 @@ module CodeRay module Encoders - - load :html - class Page < HTML + load :html - FILE_EXTENSION = 'html' + class Page < HTML - register_for :page + FILE_EXTENSION = 'html' - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :class, - :wrap => :page, - :line_numbers => :table - }) + register_for :page - end + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :class, + :wrap => :page, + :line_numbers => :table + }) + + end end end diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb index 4d74277..e892cb2 100644 --- a/lib/coderay/encoders/span.rb +++ b/lib/coderay/encoders/span.rb @@ -1,20 +1,20 @@ module CodeRay module Encoders - load :html + load :html - class Span < HTML + class Span < HTML - FILE_EXTENSION = 'span.html' + FILE_EXTENSION = 'span.html' - register_for :span + register_for :span - DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ - :css => :style, - :wrap => :span, - }) + DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ + :css => :style, + :wrap => :span, + }) - end + end end end diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index 0cf8831..f80d5c8 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -1,47 +1,47 @@ module CodeRay module Encoders - # Makes a statistic for the given tokens. - class Statistic < Encoder - - include Streamable - register_for :stats, :statistic - - attr_reader :type_stats, :real_token_count - - protected - - TypeStats = Struct.new :count, :size - - def setup options - @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 } - @real_token_count = 0 - end - - def generate tokens, options - @tokens = tokens - super - end - - def text_token text, kind - @real_token_count += 1 unless kind == :space - @type_stats[kind].count += 1 - @type_stats[kind].size += text.size - @type_stats['TOTAL'].size += text.size - end - - # TODO Hierarchy handling - def block_token action, kind - #@content_type = kind - @type_stats['open/close'].count += 1 - end - - def token text, kind - super - @type_stats['TOTAL'].count += 1 - end - - STATS = <<-STATS + # Makes a statistic for the given tokens. + class Statistic < Encoder + + include Streamable + register_for :stats, :statistic + + attr_reader :type_stats, :real_token_count + + protected + + TypeStats = Struct.new :count, :size + + def setup options + @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 } + @real_token_count = 0 + end + + def generate tokens, options + @tokens = tokens + super + end + + def text_token text, kind + @real_token_count += 1 unless kind == :space + @type_stats[kind].count += 1 + @type_stats[kind].size += text.size + @type_stats['TOTAL'].size += text.size + end + + # TODO Hierarchy handling + def block_token action, kind + #@content_type = kind + @type_stats['open/close'].count += 1 + end + + def token text, kind + super + @type_stats['TOTAL'].count += 1 + end + + STATS = <<-STATS Code Statistics @@ -53,29 +53,29 @@ Token Types (%d): type count ratio size (average) ------------------------------------------------------------- %s - STATS + STATS # space 12007 33.81 % 1.7 - TOKEN_TYPES_ROW = <<-TKR + TOKEN_TYPES_ROW = <<-TKR %-20s %8d %6.2f %% %5.1f - TKR - - def finish options - all = @type_stats['TOTAL'] - all_count, all_size = all.count, all.size - @type_stats.each do |type, stat| - stat.size /= stat.count.to_f - end - types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v| - TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size] - end.join - STATS % [ - all_count, @real_token_count, all_size, - @type_stats.delete_if { |k, v| k.is_a? String }.size, - types_stats - ] - end - - end + TKR + + def finish options + all = @type_stats['TOTAL'] + all_count, all_size = all.count, all.size + @type_stats.each do |type, stat| + stat.size /= stat.count.to_f + end + types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v| + TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size] + end.join + STATS % [ + all_count, @real_token_count, all_size, + @type_stats.delete_if { |k, v| k.is_a? String }.size, + types_stats + ] + end + + end end end diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb index 02f76cb..31661ef 100644 --- a/lib/coderay/encoders/text.rb +++ b/lib/coderay/encoders/text.rb @@ -1,33 +1,33 @@ module CodeRay module Encoders - class Text < Encoder + class Text < Encoder - include Streamable - register_for :text + include Streamable + register_for :text - FILE_EXTENSION = 'txt' + FILE_EXTENSION = 'txt' - DEFAULT_OPTIONS = { - :separator => '' - } + DEFAULT_OPTIONS = { + :separator => '' + } - protected - def setup options - super - @sep = options[:separator] - end + protected + def setup options + super + @sep = options[:separator] + end - def token text, kind - return unless text.respond_to? :to_str - @out << text + @sep - end + def token text, kind + return unless text.respond_to? :to_str + @out << text + @sep + end - def finish options - @out.chomp @sep - end + def finish options + @out.chomp @sep + end - end + end end end diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb index 2bcca40..743cc0e 100644 --- a/lib/coderay/encoders/tokens.rb +++ b/lib/coderay/encoders/tokens.rb @@ -1,44 +1,44 @@ module CodeRay module Encoders - # The Tokens encoder converts the tokens to a simple - # readable format. It doesn't use colors and is mainly - # intended for console output. - # - # The tokens are converted with Tokens.write_token. - # - # The format is: - # - # \t \n - # - # Example: - # - # require 'coderay' - # puts CodeRay.scan("puts 3 + 4", :ruby).tokens - # - # prints: - # - # ident puts - # space - # integer 3 - # space - # operator + - # space - # integer 4 - # - class Tokens < Encoder + # The Tokens encoder converts the tokens to a simple + # readable format. It doesn't use colors and is mainly + # intended for console output. + # + # The tokens are converted with Tokens.write_token. + # + # The format is: + # + # \t \n + # + # Example: + # + # require 'coderay' + # puts CodeRay.scan("puts 3 + 4", :ruby).tokens + # + # prints: + # + # ident puts + # space + # integer 3 + # space + # operator + + # space + # integer 4 + # + class Tokens < Encoder - include Streamable - register_for :tokens + include Streamable + register_for :tokens - FILE_EXTENSION = 'tok' + FILE_EXTENSION = 'tok' - protected - def token *args - @out << CodeRay::Tokens.write_token(*args) - end + protected + def token *args + @out << CodeRay::Tokens.write_token(*args) + end - end + end end end diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb index 246fe0a..21ef0cf 100644 --- a/lib/coderay/encoders/xml.rb +++ b/lib/coderay/encoders/xml.rb @@ -1,71 +1,71 @@ module CodeRay module Encoders - # = XML Encoder - # - # Uses REXML. Very slow. - class XML < Encoder + # = XML Encoder + # + # Uses REXML. Very slow. + class XML < Encoder - include Streamable - register_for :xml + include Streamable + register_for :xml - FILE_EXTENSION = 'xml' + FILE_EXTENSION = 'xml' - require 'rexml/document' + require 'rexml/document' - DEFAULT_OPTIONS = { - :tab_width => 8, - :pretty => -1, - :transitive => false, - } + DEFAULT_OPTIONS = { + :tab_width => 8, + :pretty => -1, + :transitive => false, + } - protected - - def setup options - @out = '' - @doc = REXML::Document.new - @doc << REXML::XMLDecl.new - @tab_width = options[:tab_width] - @root = @node = @doc.add_element('coderay-tokens') - end - - def finish options - @doc.write @out, options[:pretty], options[:transitive], true - @out - end - - def text_token text, kind - if kind == :space - token = @node - else - token = @node.add_element kind.to_s - end - text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| - case - when space - token << REXML::Text.new(space, true) - when tab - token << REXML::Text.new(tab, true) - when nl - token << REXML::Text.new(nl, true) - else - token << REXML::Text.new($&) - end - end - end + protected - def open_token kind - @node = @node.add_element kind.to_s - end + def setup options + @out = '' + @doc = REXML::Document.new + @doc << REXML::XMLDecl.new + @tab_width = options[:tab_width] + @root = @node = @doc.add_element('coderay-tokens') + end - def close_token kind - if @node == @root - raise 'no token to close!' - end - @node = @node.parent - end + def finish options + @doc.write @out, options[:pretty], options[:transitive], true + @out + end - end + def text_token text, kind + if kind == :space + token = @node + else + token = @node.add_element kind.to_s + end + text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl| + case + when space + token << REXML::Text.new(space, true) + when tab + token << REXML::Text.new(tab, true) + when nl + token << REXML::Text.new(nl, true) + else + token << REXML::Text.new($&) + end + end + end + + def open_token kind + @node = @node.add_element kind.to_s + end + + def close_token kind + if @node == @root + raise 'no token to close!' + end + @node = @node.parent + end + + end end end diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb index b842647..47f64a4 100644 --- a/lib/coderay/encoders/yaml.rb +++ b/lib/coderay/encoders/yaml.rb @@ -1,22 +1,22 @@ module CodeRay module Encoders - # = YAML Encoder - # - # Slow. - class YAML < Encoder + # = YAML Encoder + # + # Slow. + class YAML < Encoder - register_for :yaml + register_for :yaml - FILE_EXTENSION = 'yaml' + FILE_EXTENSION = 'yaml' - protected - def compile tokens, options - require 'yaml' - @out = tokens.to_a.to_yaml - end + protected + def compile tokens, options + require 'yaml' + @out = tokens.to_a.to_yaml + end - end + end end end diff --git a/lib/coderay/helpers/filetype.rb b/lib/coderay/helpers/filetype.rb index bc8ccf2..7a9c489 100644 --- a/lib/coderay/helpers/filetype.rb +++ b/lib/coderay/helpers/filetype.rb @@ -9,99 +9,98 @@ # == Documentation # # # determine the type of the given -# lang = FileType[ARGV.first] +# lang = FileType[ARGV.first] # -# # return :plaintext if the file type is unknown -# lang = FileType.fetch ARGV.first, :plaintext +# # return :plaintext if the file type is unknown +# lang = FileType.fetch ARGV.first, :plaintext # -# # try the shebang line, too -# lang = FileType.fetch ARGV.first, :plaintext, true -# +# # try the shebang line, too +# lang = FileType.fetch ARGV.first, :plaintext, true module FileType - - UnknownFileType = Class.new Exception - - class << self - - # Try to determine the file type of the file. - # - # +filename+ is a relative or absolute path to a file. - # - # The file itself is only accessed when +read_shebang+ is set to true. - # That means you can get filetypes from files that don't exist. - def [] filename, read_shebang = false - name = File.basename filename - ext = File.extname name - ext.sub!(/^\./, '') # delete the leading dot - - type = - TypeFromExt[ext] || - TypeFromExt[ext.downcase] || - TypeFromName[name] || - TypeFromName[name.downcase] - type ||= shebang(filename) if read_shebang - - type - end - - def shebang filename - begin - File.open filename, 'r' do |f| - first_line = f.gets - first_line[TypeFromShebang] - end - rescue IOError - nil - end - end - - # This works like Hash#fetch. - # - # If the filetype cannot be found, the +default+ value - # is returned. - def fetch filename, default = nil, read_shebang = false - if default and block_given? - warn 'block supersedes default value argument' - end - - unless type = self[filename, read_shebang] - return yield if block_given? - return default if default - raise UnknownFileType, 'Could not determine type of %p.' % filename - end - type - end - - end - - TypeFromExt = { - 'rb' => :ruby, - 'rbw' => :ruby, - 'rake' => :ruby, - 'cpp' => :c, - 'c' => :c, - 'h' => :c, - 'xml' => :xml, - 'htm' => :html, - 'html' => :html, - 'xhtml' => :xhtml, - 'rhtml' => :rhtml, - 'yaml' => :yaml, - 'yml' => :yaml, - } - - TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - - TypeFromName = { - 'Rakefile' => :ruby, - 'Rantfile' => :ruby, - } + + UnknownFileType = Class.new Exception + + class << self + + # Try to determine the file type of the file. + # + # +filename+ is a relative or absolute path to a file. + # + # The file itself is only accessed when +read_shebang+ is set to true. + # That means you can get filetypes from files that don't exist. + def [] filename, read_shebang = false + name = File.basename filename + ext = File.extname name + ext.sub!(/^\./, '') # delete the leading dot + + type = + TypeFromExt[ext] || + TypeFromExt[ext.downcase] || + TypeFromName[name] || + TypeFromName[name.downcase] + type ||= shebang(filename) if read_shebang + + type + end + + def shebang filename + begin + File.open filename, 'r' do |f| + first_line = f.gets + first_line[TypeFromShebang] + end + rescue IOError + nil + end + end + + # This works like Hash#fetch. + # + # If the filetype cannot be found, the +default+ value + # is returned. + def fetch filename, default = nil, read_shebang = false + if default and block_given? + warn 'block supersedes default value argument' + end + + unless type = self[filename, read_shebang] + return yield if block_given? + return default if default + raise UnknownFileType, 'Could not determine type of %p.' % filename + end + type + end + + end + + TypeFromExt = { + 'rb' => :ruby, + 'rbw' => :ruby, + 'rake' => :ruby, + 'cpp' => :c, + 'c' => :c, + 'h' => :c, + 'xml' => :xml, + 'htm' => :html, + 'html' => :html, + 'xhtml' => :xhtml, + 'rhtml' => :rhtml, + 'yaml' => :yaml, + 'yml' => :yaml, + } + + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ + + TypeFromName = { + 'Rakefile' => :ruby, + 'Rantfile' => :ruby, + } end if $0 == __FILE__ - $VERBOSE = true - eval DATA.read, nil, $0, __LINE__+4 + $VERBOSE = true + eval DATA.read, nil, $0, __LINE__+4 end __END__ @@ -110,72 +109,72 @@ require 'test/unit' class TC_FileType < Test::Unit::TestCase - def test_fetch - assert_raise FileType::UnknownFileType do - FileType.fetch '' - end - - assert_throws :not_found do - FileType.fetch '.' do - throw :not_found - end - end - - assert_equal :default, FileType.fetch('c', :default) - - stderr, fake_stderr = $stderr, Object.new - $err = '' - def fake_stderr.write x - $err << x - end - $stderr = fake_stderr - FileType.fetch('c', :default) { } - assert_equal "block supersedes default value argument\n", $err - $stderr = stderr - end - - def test_ruby - assert_equal :ruby, FileType['test.rb'] - assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw'] - assert_equal :ruby, FileType['/usr/bin/something/Rakefile'] - assert_equal :ruby, FileType['~/myapp/gem/Rantfile'] - assert_equal :ruby, FileType['./lib/tasks\repository.rake'] - assert_not_equal :ruby, FileType['test_rb'] - assert_not_equal :ruby, FileType['Makefile'] - assert_not_equal :ruby, FileType['set.rb/set'] - assert_not_equal :ruby, FileType['~/projects/blabla/rb'] - end - - def test_c - assert_equal :c, FileType['test.c'] - assert_equal :c, FileType['C:\\Program Files\\x\\y\\c\\test.h'] - assert_not_equal :c, FileType['test_c'] - assert_not_equal :c, FileType['Makefile'] - assert_not_equal :c, FileType['set.h/set'] - assert_not_equal :c, FileType['~/projects/blabla/c'] - end - - def test_html - assert_equal :html, FileType['test.htm'] - assert_equal :xhtml, FileType['test.xhtml'] - assert_equal :xhtml, FileType['test.html.xhtml'] - assert_equal :rhtml, FileType['_form.rhtml'] - end - - def test_yaml - assert_equal :yaml, FileType['test.yml'] - assert_equal :yaml, FileType['test.yaml'] - assert_equal :yaml, FileType['my.html.yaml'] - assert_not_equal :yaml, FileType['YAML'] - end - - def test_shebang - dir = './test' - if File.directory? dir - Dir.chdir dir do - assert_equal :c, FileType['test.c'] - end - end - end + def test_fetch + assert_raise FileType::UnknownFileType do + FileType.fetch '' + end + + assert_throws :not_found do + FileType.fetch '.' do + throw :not_found + end + end + + assert_equal :default, FileType.fetch('c', :default) + + stderr, fake_stderr = $stderr, Object.new + $err = '' + def fake_stderr.write x + $err << x + end + $stderr = fake_stderr + FileType.fetch('c', :default) { } + assert_equal "block supersedes default value argument\n", $err + $stderr = stderr + end + + def test_ruby + assert_equal :ruby, FileType['test.rb'] + assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw'] + assert_equal :ruby, FileType['/usr/bin/something/Rakefile'] + assert_equal :ruby, FileType['~/myapp/gem/Rantfile'] + assert_equal :ruby, FileType['./lib/tasks\repository.rake'] + assert_not_equal :ruby, FileType['test_rb'] + assert_not_equal :ruby, FileType['Makefile'] + assert_not_equal :ruby, FileType['set.rb/set'] + assert_not_equal :ruby, FileType['~/projects/blabla/rb'] + end + + def test_c + assert_equal :c, FileType['test.c'] + assert_equal :c, FileType['C:\\Program Files\\x\\y\\c\\test.h'] + assert_not_equal :c, FileType['test_c'] + assert_not_equal :c, FileType['Makefile'] + assert_not_equal :c, FileType['set.h/set'] + assert_not_equal :c, FileType['~/projects/blabla/c'] + end + + def test_html + assert_equal :html, FileType['test.htm'] + assert_equal :xhtml, FileType['test.xhtml'] + assert_equal :xhtml, FileType['test.html.xhtml'] + assert_equal :rhtml, FileType['_form.rhtml'] + end + + def test_yaml + assert_equal :yaml, FileType['test.yml'] + assert_equal :yaml, FileType['test.yaml'] + assert_equal :yaml, FileType['my.html.yaml'] + assert_not_equal :yaml, FileType['YAML'] + end + + def test_shebang + dir = './test' + if File.directory? dir + Dir.chdir dir do + assert_equal :c, FileType['test.c'] + end + end + end end diff --git a/lib/coderay/helpers/gzip_simple.rb b/lib/coderay/helpers/gzip_simple.rb index 357ce6b..28e7f1e 100644 --- a/lib/coderay/helpers/gzip_simple.rb +++ b/lib/coderay/helpers/gzip_simple.rb @@ -11,40 +11,39 @@ # See +GZip+ module and the +String+ extensions. # module GZip - - require 'zlib' - # The default zipping level. 7 zips good and fast. - DEFAULT_GZIP_LEVEL = 7 - - # Unzips the given string +s+. - # - # Example: - # require 'gzip_simple' - # print GZip.gunzip(File.read('adresses.gz')) - # - def GZip.gunzip s - Zlib::Inflate.inflate s - end - - # Zips the given string +s+. - # - # Example: - # require 'gzip_simple' - # File.open('adresses.gz', 'w') do |file - # file.write GZip.gzip('Mum: 0123 456 789', 9) - # end - # - # If you provide a +level+, you can control how strong - # the string is compressed: - # - 0: no compression, only convert to gzip format - # - 1: compress fast - # - 7: compress more, but still fast (default) - # - 8: compress more, slower - # - 9: compress best, very slow - def GZip.gzip s, level = DEFAULT_GZIP_LEVEL - Zlib::Deflate.new(level).deflate s, Zlib::FINISH - end + require 'zlib' + + # The default zipping level. 7 zips good and fast. + DEFAULT_GZIP_LEVEL = 7 + + # Unzips the given string +s+. + # + # Example: + # require 'gzip_simple' + # print GZip.gunzip(File.read('adresses.gz')) + def GZip.gunzip s + Zlib::Inflate.inflate s + end + + # Zips the given string +s+. + # + # Example: + # require 'gzip_simple' + # File.open('adresses.gz', 'w') do |file + # file.write GZip.gzip('Mum: 0123 456 789', 9) + # end + # + # If you provide a +level+, you can control how strong + # the string is compressed: + # - 0: no compression, only convert to gzip format + # - 1: compress fast + # - 7: compress more, but still fast (default) + # - 8: compress more, slower + # - 9: compress best, very slow + def GZip.gzip s, level = DEFAULT_GZIP_LEVEL + Zlib::Deflate.new(level).deflate s, Zlib::FINISH + end end # String extensions to use the GZip module. @@ -65,31 +64,31 @@ end # # unzipping works # p x_gz.gunzip == x #-> true class String - # Returns the string, unzipped. - # See GZip.gunzip - def gunzip - GZip.gunzip self - end - # Replaces the string with its unzipped value. - # See GZip.gunzip - def gunzip! - replace gunzip - end - - # Returns the string, zipped. - # +level+ is the gzip compression level, see GZip.gzip. - def gzip level = GZip::DEFAULT_GZIP_LEVEL - GZip.gzip self, level - end - # Replaces the string with its zipped value. - # See GZip.gzip. - def gzip!(*args) - replace gzip(*args) - end + # Returns the string, unzipped. + # See GZip.gunzip + def gunzip + GZip.gunzip self + end + # Replaces the string with its unzipped value. + # See GZip.gunzip + def gunzip! + replace gunzip + end + + # Returns the string, zipped. + # +level+ is the gzip compression level, see GZip.gzip. + def gzip level = GZip::DEFAULT_GZIP_LEVEL + GZip.gzip self, level + end + # Replaces the string with its zipped value. + # See GZip.gzip. + def gzip!(*args) + replace gzip(*args) + end end if $0 == __FILE__ - eval DATA.read, nil, $0, __LINE__+4 + eval DATA.read, nil, $0, __LINE__+4 end __END__ @@ -107,17 +106,17 @@ INFO = 'packed to %0.3f%%' # :nodoc: x = Array.new(100000) { rand(255).chr + 'aaaaaaaaa' + rand(255).chr }.join Benchmark.bm(10) do |bm| - for level in 0..9 - bm.report "zip #{level}" do - $x = x.gzip level - end - puts INFO % [100.0 * $x.size / x.size] - end - bm.report 'zip' do - $x = x.gzip - end - puts INFO % [100.0 * $x.size / x.size] - bm.report 'unzip' do - $x.gunzip - end + for level in 0..9 + bm.report "zip #{level}" do + $x = x.gzip level + end + puts INFO % [100.0 * $x.size / x.size] + end + bm.report 'zip' do + $x = x.gzip + end + puts INFO % [100.0 * $x.size / x.size] + bm.report 'unzip' do + $x.gunzip + end end diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index 3383828..7e90279 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -3,310 +3,310 @@ # $Id$ # # A simple subclass plugin system. -# -# Example: -# class Generators < PluginHost -# plugin_path 'app/generators' -# end -# -# class Generator -# extend Plugin -# PLUGIN_HOST = Generators -# end -# -# class FancyGenerator < Generator -# register_for :fancy -# end # -# Generators[:fancy] #-> FancyGenerator -# # or -# require_plugin 'Generators/fancy' +# Example: +# class Generators < PluginHost +# plugin_path 'app/generators' +# end +# +# class Generator +# extend Plugin +# PLUGIN_HOST = Generators +# end +# +# class FancyGenerator < Generator +# register_for :fancy +# end +# +# Generators[:fancy] #-> FancyGenerator +# # or +# require_plugin 'Generators/fancy' module PluginHost - # Raised if Encoders::[] fails because: - # * a file could not be found - # * the requested Encoder is not registered - PluginNotFound = Class.new Exception - HostNotFound = Class.new Exception - - PLUGIN_HOSTS = [] - PLUGIN_HOSTS_BY_ID = {} # dummy hash - - # Loads all plugins using all_plugin_names and load. - def load_all - for plugin in all_plugin_names - load plugin - end - end - - # Returns the Plugin for +id+. - # - # Example: - # yaml_plugin = MyPluginHost[:yaml] - def [] id, *args, &blk - plugin = validate_id(id) - begin - plugin = plugin_hash.[] plugin, *args, &blk - end while plugin.is_a? Symbol - plugin - end - - # Alias for +[]+. - alias load [] - - def require_helper plugin_id, helper_name - path = path_to File.join(plugin_id, helper_name) - require path - end - - class << self - - # Adds the module/class to the PLUGIN_HOSTS list. - def extended mod - PLUGIN_HOSTS << mod - end - - # Warns you that you should not #include this module. - def included mod - warn "#{name} should not be included. Use extend." - end - - # Find the PluginHost for host_id. - def host_by_id host_id - unless PLUGIN_HOSTS_BY_ID.default_proc - ph = Hash.new do |h, a_host_id| - for host in PLUGIN_HOSTS - h[host.host_id] = host - end - h.fetch a_host_id, nil - end - PLUGIN_HOSTS_BY_ID.replace ph - end - PLUGIN_HOSTS_BY_ID[host_id] - end - - end - - # The path where the plugins can be found. - def plugin_path *args - unless args.empty? - @plugin_path = File.expand_path File.join(*args) - load_map - end - @plugin_path - end - - # The host's ID. - # - # If PLUGIN_HOST_ID is not set, it is simply the class name. - def host_id - if self.const_defined? :PLUGIN_HOST_ID - self::PLUGIN_HOST_ID - else - name - end - end - - # Map a plugin_id to another. - # - # Usage: Put this in a file plugin_path/_map.rb. - # - # class MyColorHost < PluginHost - # map :navy => :dark_blue, - # :maroon => :brown, - # :luna => :moon - # end - def map hash - for from, to in hash - from = validate_id from - to = validate_id to - plugin_hash[from] = to unless plugin_hash.has_key? from - end - end - - # Define the default plugin to use when no plugin is found - # for a given id. - # - # See also map. - # - # class MyColorHost < PluginHost - # map :navy => :dark_blue - # default :gray - # end - def default id - id = validate_id id - plugin_hash[nil] = id - end - - # Every plugin must register itself for one or more - # +ids+ by calling register_for, which calls this method. - # - # See Plugin#register_for. - def register plugin, *ids - for id in ids - unless id.is_a? Symbol - raise ArgumentError, - "id must be a Symbol, but it was a #{id.class}" - end - plugin_hash[validate_id(id)] = plugin - end - end - - # A Hash of plugion_id => Plugin pairs. - def plugin_hash - @plugin_hash ||= create_plugin_hash - end - - # Returns an array of all .rb files in the plugin path. - # - # The extension .rb is not included. - def all_plugin_names - Dir[path_to('*')].select do |file| - File.basename(file)[/^(?!_)\w+\.rb$/] - end.map do |file| - File.basename file, '.rb' - end - end + # Raised if Encoders::[] fails because: + # * a file could not be found + # * the requested Encoder is not registered + PluginNotFound = Class.new Exception + HostNotFound = Class.new Exception + + PLUGIN_HOSTS = [] + PLUGIN_HOSTS_BY_ID = {} # dummy hash + + # Loads all plugins using all_plugin_names and load. + def load_all + for plugin in all_plugin_names + load plugin + end + end + + # Returns the Plugin for +id+. + # + # Example: + # yaml_plugin = MyPluginHost[:yaml] + def [] id, *args, &blk + plugin = validate_id(id) + begin + plugin = plugin_hash.[] plugin, *args, &blk + end while plugin.is_a? Symbol + plugin + end + + # Alias for +[]+. + alias load [] + + def require_helper plugin_id, helper_name + path = path_to File.join(plugin_id, helper_name) + require path + end + + class << self + + # Adds the module/class to the PLUGIN_HOSTS list. + def extended mod + PLUGIN_HOSTS << mod + end + + # Warns you that you should not #include this module. + def included mod + warn "#{name} should not be included. Use extend." + end + + # Find the PluginHost for host_id. + def host_by_id host_id + unless PLUGIN_HOSTS_BY_ID.default_proc + ph = Hash.new do |h, a_host_id| + for host in PLUGIN_HOSTS + h[host.host_id] = host + end + h.fetch a_host_id, nil + end + PLUGIN_HOSTS_BY_ID.replace ph + end + PLUGIN_HOSTS_BY_ID[host_id] + end + + end + + # The path where the plugins can be found. + def plugin_path *args + unless args.empty? + @plugin_path = File.expand_path File.join(*args) + load_map + end + @plugin_path + end + + # The host's ID. + # + # If PLUGIN_HOST_ID is not set, it is simply the class name. + def host_id + if self.const_defined? :PLUGIN_HOST_ID + self::PLUGIN_HOST_ID + else + name + end + end + + # Map a plugin_id to another. + # + # Usage: Put this in a file plugin_path/_map.rb. + # + # class MyColorHost < PluginHost + # map :navy => :dark_blue, + # :maroon => :brown, + # :luna => :moon + # end + def map hash + for from, to in hash + from = validate_id from + to = validate_id to + plugin_hash[from] = to unless plugin_hash.has_key? from + end + end + + # Define the default plugin to use when no plugin is found + # for a given id. + # + # See also map. + # + # class MyColorHost < PluginHost + # map :navy => :dark_blue + # default :gray + # end + def default id + id = validate_id id + plugin_hash[nil] = id + end + + # Every plugin must register itself for one or more + # +ids+ by calling register_for, which calls this method. + # + # See Plugin#register_for. + def register plugin, *ids + for id in ids + unless id.is_a? Symbol + raise ArgumentError, + "id must be a Symbol, but it was a #{id.class}" + end + plugin_hash[validate_id(id)] = plugin + end + end + + # A Hash of plugion_id => Plugin pairs. + def plugin_hash + @plugin_hash ||= create_plugin_hash + end + + # Returns an array of all .rb files in the plugin path. + # + # The extension .rb is not included. + def all_plugin_names + Dir[path_to('*')].select do |file| + File.basename(file)[/^(?!_)\w+\.rb$/] + end.map do |file| + File.basename file, '.rb' + end + end protected - # Created a new plugin list and stores it to @plugin_hash. - def create_plugin_hash - @plugin_hash = - Hash.new do |h, plugin_id| - id = validate_id(plugin_id) - path = path_to id - begin - require path - rescue LoadError => boom - if h.has_key? nil # default plugin - h[id] = h[nil] - else - raise PluginNotFound, 'Could not load plugin %p: %s' % [id, boom] - end - else - # Plugin should have registered by now - unless h.has_key? id - raise PluginNotFound, - "No #{self.name} plugin for #{id.inspect} found in #{path}." - end - end - h[id] - end - end - - # Makes a map of all loaded scanners. - def inspect - map = plugin_hash.dup - map.each do |id, plugin| - map[id] = plugin.name[/(?>[\w_]+)$/] - end - map.inspect - end - - # Loads the map file (see map). - # - # This is done automatically when plugin_path is called. - def load_map - mapfile = path_to '_map' - if File.exist? mapfile - require mapfile - elsif $DEBUG - warn 'no _map.rb found for %s' % name - end - end - - # Returns the Plugin for +id+. - # Use it like Hash#fetch. - # - # Example: - # yaml_plugin = MyPluginHost[:yaml, :default] - def fetch id, *args, &blk - plugin_hash.fetch validate_id(id), *args, &blk - end - - # Returns the expected path to the plugin file for the given id. - def path_to plugin_id - File.join plugin_path, "#{plugin_id}.rb" - end - - # Converts +id+ to a Symbol if it is a String, - # or returns +id+ if it already is a Symbol. - # - # Raises +ArgumentError+ for all other objects, or if the - # given String includes non-alphanumeric characters (\W). - def validate_id id - if id.is_a? Symbol or id.nil? - id - elsif id.is_a? String - if id[/\w+/] == id - id.to_sym - else - raise ArgumentError, "Invalid id: '#{id}' given." - end - else - raise ArgumentError, - "String or Symbol expected, but #{id.class} given." - end - end + # Created a new plugin list and stores it to @plugin_hash. + def create_plugin_hash + @plugin_hash = + Hash.new do |h, plugin_id| + id = validate_id(plugin_id) + path = path_to id + begin + require path + rescue LoadError => boom + if h.has_key? nil # default plugin + h[id] = h[nil] + else + raise PluginNotFound, 'Could not load plugin %p: %s' % [id, boom] + end + else + # Plugin should have registered by now + unless h.has_key? id + raise PluginNotFound, + "No #{self.name} plugin for #{id.inspect} found in #{path}." + end + end + h[id] + end + end + + # Makes a map of all loaded scanners. + def inspect + map = plugin_hash.dup + map.each do |id, plugin| + map[id] = plugin.name[/(?>[\w_]+)$/] + end + map.inspect + end + + # Loads the map file (see map). + # + # This is done automatically when plugin_path is called. + def load_map + mapfile = path_to '_map' + if File.exist? mapfile + require mapfile + elsif $DEBUG + warn 'no _map.rb found for %s' % name + end + end + + # Returns the Plugin for +id+. + # Use it like Hash#fetch. + # + # Example: + # yaml_plugin = MyPluginHost[:yaml, :default] + def fetch id, *args, &blk + plugin_hash.fetch validate_id(id), *args, &blk + end + + # Returns the expected path to the plugin file for the given id. + def path_to plugin_id + File.join plugin_path, "#{plugin_id}.rb" + end + + # Converts +id+ to a Symbol if it is a String, + # or returns +id+ if it already is a Symbol. + # + # Raises +ArgumentError+ for all other objects, or if the + # given String includes non-alphanumeric characters (\W). + def validate_id id + if id.is_a? Symbol or id.nil? + id + elsif id.is_a? String + if id[/\w+/] == id + id.to_sym + else + raise ArgumentError, "Invalid id: '#{id}' given." + end + else + raise ArgumentError, + "String or Symbol expected, but #{id.class} given." + end + end end # = Plugin -# -# Plugins have to include this module. # -# IMPORTANT: use extend for this module. +# Plugins have to include this module. +# +# IMPORTANT: use extend for this module. # -# Example: see PluginHost. +# Example: see PluginHost. module Plugin - def included mod - warn "#{name} should not be included. Use extend." - end - - # Register this class for the given langs. - # Example: - # class MyPlugin < PluginHost::BaseClass - # register_for :my_id - # ... - # end - # - # See PluginHost.register. - def register_for *ids - plugin_host.register self, *ids - end - - # The host for this Plugin class. - def plugin_host host = nil - if host and not host.is_a? PluginHost - raise ArgumentError, - "PluginHost expected, but #{host.class} given." - end - self.const_set :PLUGIN_HOST, host if host - self::PLUGIN_HOST - end - - # Require some helper files. - # - # Example: - # - # class MyPlugin < PluginHost::BaseClass - # register_for :my_id - # helper :my_helper - # - # The above example loads the file myplugin/my_helper.rb relative to the - # file in which MyPlugin was defined. - def helper *helpers - for helper in helpers - self::PLUGIN_HOST.require_helper plugin_id, helper.to_s - end - end - - # Returns the pulgin id used by the engine. - def plugin_id - name[/[\w_]+$/].downcase - end + def included mod + warn "#{name} should not be included. Use extend." + end + + # Register this class for the given langs. + # Example: + # class MyPlugin < PluginHost::BaseClass + # register_for :my_id + # ... + # end + # + # See PluginHost.register. + def register_for *ids + plugin_host.register self, *ids + end + + # The host for this Plugin class. + def plugin_host host = nil + if host and not host.is_a? PluginHost + raise ArgumentError, + "PluginHost expected, but #{host.class} given." + end + self.const_set :PLUGIN_HOST, host if host + self::PLUGIN_HOST + end + + # Require some helper files. + # + # Example: + # + # class MyPlugin < PluginHost::BaseClass + # register_for :my_id + # helper :my_helper + # + # The above example loads the file myplugin/my_helper.rb relative to the + # file in which MyPlugin was defined. + def helper *helpers + for helper in helpers + self::PLUGIN_HOST.require_helper plugin_id, helper.to_s + end + end + + # Returns the pulgin id used by the engine. + def plugin_id + name[/[\w_]+$/].downcase + end end @@ -315,12 +315,12 @@ end # The syntax used is: # # require_plugin '/' -# +# # Returns the loaded plugin. def require_plugin path - host_id, plugin_id = path.split '/', 2 - host = PluginHost.host_by_id(host_id) - raise PluginHost::HostNotFound, - "No host for #{host_id.inspect} found." unless host - host.load plugin_id + host_id, plugin_id = path.split '/', 2 + host = PluginHost.host_by_id(host_id) + raise PluginHost::HostNotFound, + "No host for #{host_id.inspect} found." unless host + host.load plugin_id end diff --git a/lib/coderay/helpers/word_list.rb b/lib/coderay/helpers/word_list.rb index 7580a5a..dfbfaf2 100644 --- a/lib/coderay/helpers/word_list.rb +++ b/lib/coderay/helpers/word_list.rb @@ -1,10 +1,10 @@ # = WordList -# +# # Copyright (c) 2006 by murphy (Kornelius Kalnbach) # # License:: LGPL / ask the author # Version:: 1.0 (2006-Feb-3) -# +# # A WordList is a Hash with some additional features. # It is intended to be used for keyword recognition. # @@ -14,11 +14,11 @@ # For case insensitive words use CaseIgnoringWordList. # # Example: -# +# # # define word arrays # RESERVED_WORDS = %w[ # asm break case continue default do else -# ... +# ... # ] # # PREDEFINED_TYPES = %w[ @@ -45,34 +45,33 @@ # # use it # kind = IDENT_KIND[match] # ... -# class WordList < Hash - # Create a WordList for the given +words+. - # - # This WordList responds to [] with +true+, if the word is - # in +words+, and with +false+ otherwise. - def self.for words - new.add words - end + # Create a WordList for the given +words+. + # + # This WordList responds to [] with +true+, if the word is + # in +words+, and with +false+ otherwise. + def self.for words + new.add words + end - # Creates a new WordList with +default+ as default value. - def initialize default = false, &block - super default, &block - end + # Creates a new WordList with +default+ as default value. + def initialize default = false, &block + super default, &block + end - # Checks if a word is included. - def include? word - has_key? word - end + # Checks if a word is included. + def include? word + has_key? word + end - # Add words to the list and associate them with +kind+. - def add words, kind = true - words.each do |word| - self[word] = kind - end - self - end + # Add words to the list and associate them with +kind+. + def add words, kind = true + words.each do |word| + self[word] = kind + end + self + end end @@ -81,27 +80,27 @@ end # keys are compared case-insensitively. class CaseIgnoringWordList < WordList - # Creates a new WordList with +default+ as default value. - # - # Text case is ignored. - def initialize default = false, &block - block ||= proc do |h, k| - h[k] = h.fetch k.downcase, default - end - super default - end + # Creates a new WordList with +default+ as default value. + # + # Text case is ignored. + def initialize default = false, &block + block ||= proc do |h, k| + h[k] = h.fetch k.downcase, default + end + super default + end - # Checks if a word is included. - def include? word - has_key? word.downcase - end + # Checks if a word is included. + def include? word + has_key? word.downcase + end - # Add words to the list and associate them with +kind+. - def add words, kind = true - words.each do |word| - self[word.downcase] = kind - end - self - end + # Add words to the list and associate them with +kind+. + def add words, kind = true + words.each do |word| + self[word.downcase] = kind + end + self + end end diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb index 55c1485..6d20211 100644 --- a/lib/coderay/scanner.rb +++ b/lib/coderay/scanner.rb @@ -1,198 +1,198 @@ module CodeRay - - require 'coderay/helpers/plugin' - - # = Scanners - # - # $Id$ - # - # This module holds the Scanner class and its subclasses. - # For example, the Ruby scanner is named CodeRay::Scanners::Ruby - # can be found in coderay/scanners/ruby. - # - # Scanner also provides methods and constants for the register - # mechanism and the [] method that returns the Scanner class - # belonging to the given lang. - # - # See PluginHost. - module Scanners - extend PluginHost - plugin_path File.dirname(__FILE__), 'scanners' - - require 'strscan' - - # = Scanner - # - # The base class for all Scanners. - # - # It is a subclass of Ruby's great +StringScanner+, which - # makes it easy to access the scanning methods inside. - # - # It is also +Enumerable+, so you can use it like an Array of - # Tokens: - # - # require 'coderay' - # - # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" - # - # for text, kind in c_scanner - # puts text if kind == :operator - # end - # - # # prints: (*==)++; - # - # OK, this is a very simple example :) - # You can also use +map+, +any?+, +find+ and even +sort_by+, - # if you want. - class Scanner < StringScanner - extend Plugin - plugin_host Scanners - - # Raised if a Scanner fails while scanning - ScanError = Class.new(Exception) - - require 'coderay/helpers/word_list' - - # The default options for all scanner classes. - # - # Define @default_options for subclasses. - DEFAULT_OPTIONS = { :stream => false } - - class << self - - # Returns if the Scanner can be used in streaming mode. - def streamable? - is_a? Streamable - end + + require 'coderay/helpers/plugin' + + # = Scanners + # + # $Id$ + # + # This module holds the Scanner class and its subclasses. + # For example, the Ruby scanner is named CodeRay::Scanners::Ruby + # can be found in coderay/scanners/ruby. + # + # Scanner also provides methods and constants for the register + # mechanism and the [] method that returns the Scanner class + # belonging to the given lang. + # + # See PluginHost. + module Scanners + extend PluginHost + plugin_path File.dirname(__FILE__), 'scanners' + + require 'strscan' + + # = Scanner + # + # The base class for all Scanners. + # + # It is a subclass of Ruby's great +StringScanner+, which + # makes it easy to access the scanning methods inside. + # + # It is also +Enumerable+, so you can use it like an Array of + # Tokens: + # + # require 'coderay' + # + # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;" + # + # for text, kind in c_scanner + # puts text if kind == :operator + # end + # + # # prints: (*==)++; + # + # OK, this is a very simple example :) + # You can also use +map+, +any?+, +find+ and even +sort_by+, + # if you want. + class Scanner < StringScanner + extend Plugin + plugin_host Scanners + + # Raised if a Scanner fails while scanning + ScanError = Class.new(Exception) + + require 'coderay/helpers/word_list' + + # The default options for all scanner classes. + # + # Define @default_options for subclasses. + DEFAULT_OPTIONS = { :stream => false } + + class << self + + # Returns if the Scanner can be used in streaming mode. + def streamable? + is_a? Streamable + end def normify code code = code.to_s.to_unix end - end + end =begin ## Excluded for speed reasons; protected seems to make methods slow. -# Save the StringScanner methods from being called. -# This would not be useful for highlighting. - strscan_public_methods = - StringScanner.instance_methods - - StringScanner.ancestors[1].instance_methods - protected(*strscan_public_methods) + # Save the StringScanner methods from being called. + # This would not be useful for highlighting. + strscan_public_methods = + StringScanner.instance_methods - + StringScanner.ancestors[1].instance_methods + protected(*strscan_public_methods) =end - # Create a new Scanner. - # - # * +code+ is the input String and is handled by the superclass - # StringScanner. - # * +options+ is a Hash with Symbols as keys. - # It is merged with the default options of the class (you can - # overwrite default options here.) - # * +block+ is the callback for streamed highlighting. - # - # If you set :stream to +true+ in the options, the Scanner uses a - # TokenStream with the +block+ as callback to handle the tokens. - # - # Else, a Tokens object is used. - def initialize code='', options = {}, &block - @options = self.class::DEFAULT_OPTIONS.merge options - raise "I am only the basic Scanner class. I can't scan "\ - "anything. :( Use my subclasses." if self.class == Scanner - - super Scanner.normify(code) - - @tokens = options[:tokens] - if @options[:stream] - warn "warning in CodeRay::Scanner.new: :stream is set, "\ - "but no block was given" unless block_given? - raise NotStreamableError, self unless kind_of? Streamable - @tokens ||= TokenStream.new(&block) - else - warn "warning in CodeRay::Scanner.new: Block given, "\ - "but :stream is #{@options[:stream]}" if block_given? - @tokens ||= Tokens.new - end - - setup - end - - # More mnemonic accessor name for the input string. - alias code string - - def reset - super - reset_instance - end - - def string= code - code = Scanner.normify(code) - super code - reset_instance - end - - # Scans the code and returns all tokens in a Tokens object. - def tokenize new_string=nil, options = {} - options = @options.merge(options) - self.string = new_string if new_string - @cached_tokens = - if @options[:stream] # :stream must have been set already - reset unless new_string - scan_tokens @tokens, options - @tokens - else - scan_tokens @tokens, options - end - end - - def tokens - @cached_tokens ||= tokenize - end - - # Traverses the tokens. - def each &block - raise ArgumentError, - 'Cannot traverse TokenStream.' if @options[:stream] - tokens.each(&block) - end - include Enumerable - - # The current line position of the scanner. - # - # Beware, this is implemented inefficiently. It should be used - # for debugging only. - def line - string[0..pos].count("\n") + 1 - end - - protected - - # Can be implemented by subclasses to do some initialization - # that has to be done once per instance. - # - # Use reset for initialization that has to be done once per - # scan. - def setup - end - - # This is the central method, and commonly the only one a - # subclass implements. - # - # Subclasses must implement this method; it must return +tokens+ - # and must only use Tokens#<< for storing scanned tokens! - def scan_tokens tokens, options - raise NotImplementedError, - "#{self.class}#scan_tokens not implemented." - end - - def reset_instance - @tokens.clear unless @options[:keep_tokens] - @cached_tokens = nil - end - - # Scanner error with additional status information - def raise_inspect msg, tokens, ambit = 30 - raise ScanError, <<-EOE % [ + # Create a new Scanner. + # + # * +code+ is the input String and is handled by the superclass + # StringScanner. + # * +options+ is a Hash with Symbols as keys. + # It is merged with the default options of the class (you can + # overwrite default options here.) + # * +block+ is the callback for streamed highlighting. + # + # If you set :stream to +true+ in the options, the Scanner uses a + # TokenStream with the +block+ as callback to handle the tokens. + # + # Else, a Tokens object is used. + def initialize code='', options = {}, &block + @options = self.class::DEFAULT_OPTIONS.merge options + raise "I am only the basic Scanner class. I can't scan "\ + "anything. :( Use my subclasses." if self.class == Scanner + + super Scanner.normify(code) + + @tokens = options[:tokens] + if @options[:stream] + warn "warning in CodeRay::Scanner.new: :stream is set, "\ + "but no block was given" unless block_given? + raise NotStreamableError, self unless kind_of? Streamable + @tokens ||= TokenStream.new(&block) + else + warn "warning in CodeRay::Scanner.new: Block given, "\ + "but :stream is #{@options[:stream]}" if block_given? + @tokens ||= Tokens.new + end + + setup + end + + # More mnemonic accessor name for the input string. + alias code string + + def reset + super + reset_instance + end + + def string= code + code = Scanner.normify(code) + super code + reset_instance + end + + # Scans the code and returns all tokens in a Tokens object. + def tokenize new_string=nil, options = {} + options = @options.merge(options) + self.string = new_string if new_string + @cached_tokens = + if @options[:stream] # :stream must have been set already + reset unless new_string + scan_tokens @tokens, options + @tokens + else + scan_tokens @tokens, options + end + end + + def tokens + @cached_tokens ||= tokenize + end + + # Traverses the tokens. + def each &block + raise ArgumentError, + 'Cannot traverse TokenStream.' if @options[:stream] + tokens.each(&block) + end + include Enumerable + + # The current line position of the scanner. + # + # Beware, this is implemented inefficiently. It should be used + # for debugging only. + def line + string[0..pos].count("\n") + 1 + end + + protected + + # Can be implemented by subclasses to do some initialization + # that has to be done once per instance. + # + # Use reset for initialization that has to be done once per + # scan. + def setup + end + + # This is the central method, and commonly the only one a + # subclass implements. + # + # Subclasses must implement this method; it must return +tokens+ + # and must only use Tokens#<< for storing scanned tokens! + def scan_tokens tokens, options + raise NotImplementedError, + "#{self.class}#scan_tokens not implemented." + end + + def reset_instance + @tokens.clear unless @options[:keep_tokens] + @cached_tokens = nil + end + + # Scanner error with additional status information + def raise_inspect msg, tokens, ambit = 30 + raise ScanError, <<-EOE % [ ***ERROR in %s: %s @@ -210,29 +210,29 @@ surrounding code: ***ERROR*** - EOE - File.basename(caller[0]), - msg, - tokens.last(10).map { |t| t.inspect }.join("\n"), - line, pos, - matched, bol?, eos?, - string[pos-ambit,ambit], - string[pos,ambit], - ] - end + EOE + File.basename(caller[0]), + msg, + tokens.last(10).map { |t| t.inspect }.join("\n"), + line, pos, + matched, bol?, eos?, + string[pos-ambit,ambit], + string[pos,ambit], + ] + end - end + end - end + end end class String - # I love this hack. It seems to silence all dos/unix/mac newline problems. - def to_unix - if index ?\r - gsub(/\r\n?/, "\n") - else - self - end - end + # I love this hack. It seems to silence all dos/unix/mac newline problems. + def to_unix + if index ?\r + gsub(/\r\n?/, "\n") + else + self + end + end end diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 1482ee9..6268a6c 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,14 @@ module CodeRay module Scanners - - map :cpp => :c, - :plain => :plaintext, - :pascal => :delphi, - :irb => :ruby, - :xml => :html, - :xhtml => :nitro_html - default :plain - + map :cpp => :c, + :plain => :plaintext, + :pascal => :delphi, + :irb => :ruby, + :xml => :html, + :xhtml => :nitro_html + + default :plain + end end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index effaaa1..66b8de1 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,155 @@ module CodeRay module Scanners - - class C < Scanner - - register_for :c - - RESERVED_WORDS = [ - 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', - 'for', 'goto', 'if', 'return', 'switch', 'while', - 'struct', 'union', 'enum', 'typedef', - 'static', 'register', 'auto', 'extern', - 'sizeof', - 'volatile', 'const', # C89 - 'inline', 'restrict', # C99 - ] - - PREDEFINED_TYPES = [ - 'int', 'long', 'short', 'char', 'void', - 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 - ] - - PREDEFINED_CONSTANTS = [ - 'EOF', 'NULL', - 'true', 'false', # C99 - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_TYPES, :pre_type). - add(PREDEFINED_CONSTANTS, :pre_constant) - - ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - case state - - when :initial - - if scan(/ \s+ | \\\n /x) - kind = :space - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment - - elsif match = scan(/ \# \s* if \s* 0 /x) - match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment - - elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - match << scan(/:/) - kind = :label - end - - elsif match = scan(/L?"/) - tokens << [:open, :string] - if match[0] == ?L - tokens << ['L', :modifier] - match = '"' - end - state = :string - kind = :delimiter - - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs - state = :include_expected if self[1] == 'include' - - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) - kind = :char - - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) - kind = :oct - - elsif scan(/(?:\d+)(?![.eEfF])/) - kind = :integer - - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - - else - getch - end - - when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - kind = :error - state = :initial - else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens - end - - when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include - state = :initial - - elsif match = scan(/\s+/) - kind = :space - state = :initial if match.index ?\n - - else - getch - - end - - else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = :error + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + end + + when :string + if scan(/[^\\"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end end end diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index c92fab5..d9d9e1d 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,129 @@ module CodeRay module Scanners - - class Delphi < Scanner + + class Delphi < Scanner - register_for :delphi - - RESERVED_WORDS = [ - 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', - 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', - 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', - 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', - 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', - 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', - 'procedure', 'program', 'property', 'raise', 'record', 'repeat', - 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', - 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', - 'xor', 'on' - ] + register_for :delphi + + RESERVED_WORDS = [ + 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', + 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', + 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', + 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', + 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', + 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', + 'procedure', 'program', 'property', 'raise', 'record', 'repeat', + 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', + 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', + 'xor', 'on' + ] - DIRECTIVES = [ - 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', - 'contains', 'deprecated', 'dispid', 'dynamic', 'export', - 'external', 'far', 'forward', 'implements', 'local', - 'near', 'nodefault', 'on', 'overload', 'override', - 'package', 'pascal', 'platform', 'private', 'protected', 'public', - 'published', 'read', 'readonly', 'register', 'reintroduce', - 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', - 'virtual', 'write', 'writeonly' - ] + DIRECTIVES = [ + 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', + 'contains', 'deprecated', 'dispid', 'dynamic', 'export', + 'external', 'far', 'forward', 'implements', 'local', + 'near', 'nodefault', 'on', 'overload', 'override', + 'package', 'pascal', 'platform', 'private', 'protected', 'public', + 'published', 'read', 'readonly', 'register', 'reintroduce', + 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', + 'virtual', 'write', 'writeonly' + ] - IDENT_KIND = CaseIgnoringWordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(DIRECTIVES, :directive) + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(DIRECTIVES, :directive) - def scan_tokens tokens, options + def scan_tokens tokens, options - state = :initial + state = :initial - until eos? + until eos? - kind = :error - match = nil + kind = :error + match = nil - if state == :initial - - if scan(/ \s+ /x) - kind = :space - - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - kind = :preprocessor - - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - kind = :comment - - elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] - next - - elsif match = scan(/ ' /x) - tokens << [:open, :string] - state = :string - kind = :delimiter - - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char - - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex - - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer - - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float + if state == :initial + + if scan(/ \s+ /x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/ ' /x) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + kind = :char + + elsif scan(/ \$ [0-9A-Fa-f]+ /x) + kind = :hex + + elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + kind = :integer + + elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + kind = :float - else - getch - end - - elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/\n/) - state = :initial - else - raise "else case \' reached; %p not handled." % peek(1), tokens - end - - else - raise 'else-case reached', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match + else + getch + end + + elsif state == :string + if scan(/[^\n']+/) + kind = :content + elsif scan(/''/) + kind = :char + elsif scan(/'/) + tokens << ["'", :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/\n/) + state = :initial + else + raise "else case \' reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match - tokens << [match, kind] - - end - - tokens - end + tokens << [match, kind] + + end + + tokens + end - end + end end end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index cbf8a55..7cdc07e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,167 @@ module CodeRay module Scanners - # HTML Scanner - # - # $Id$ - class HTML < Scanner - - include Streamable - register_for :html - - ATTR_NAME = /[\w.:-]+/ - ATTR_VALUE_UNQUOTED = ATTR_NAME - TAG_END = /\/?>/ - HEX = /[0-9a-fA-F]/ - ENTITY = / - & - (?: - \w+ - | - \# - (?: - \d+ - | - x#{HEX}+ - ) - ) - ; - /ox - - PLAIN_STRING_CONTENT = { - "'" => /[^&'>\n]+/, - '"' => /[^&">\n]+/, - } - - private - def setup - @state = :initial - @plain_string_content = nil - end - - def scan_tokens tokens, options - - state = @state - plain_string_content = @plain_string_content - - until eos? - - kind = :error - match = nil - - if scan(/\s+/m) - kind = :space - - else - - case state - - when :initial - if scan(//m) - kind = :comment - elsif scan(//m) - kind = :preprocessor - elsif scan(/<\?xml.*?\?>/m) - kind = :preprocessor - elsif scan(/<\?.*?\?>|<%.*?%>/m) - kind = :comment - elsif scan(/<\/[-\w_.:]*>/m) - kind = :tag - elsif match = scan(/<[-\w_.:]*>?/m) - kind = :tag - state = :attribute unless match[-1] == ?> - elsif scan(/[^<>&]+/) - kind = :plain - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[>&]/) - kind = :error - else - raise_inspect '[BUG] else-case reached with state %p' % [state], tokens - end - - when :attribute - if scan(/#{TAG_END}/) - kind = :tag - state = :initial - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - state = :attribute_equal - else - getch - end - - when :attribute_equal - if scan(/=/) - kind = :operator - state = :attribute_value - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - elsif scan(/./) - state = :attribute - end - - when :attribute_value - if scan(/#{ATTR_VALUE_UNQUOTED}/o) - kind = :attribute_value - state = :attribute - elsif match = scan(/["']/) - tokens << [:open, :string] - state = :attribute_value_string - plain_string_content = PLAIN_STRING_CONTENT[match] - kind = :delimiter - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - else - getch - end - - when :attribute_value_string - if scan(plain_string_content) - kind = :content - elsif scan(/['"]/) - tokens << [matched, :delimiter] - tokens << [:close, :string] - state = :attribute - next - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[\n>]/) - tokens << [:close, :string] - kind = :error - state = :initial - end - - else - raise_inspect 'Unknown state: %p' % [state], tokens - - end - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end - - if options[:keep_state] - @state = state - @plain_string_content = plain_string_content - end - - tokens - end - - end + # HTML Scanner + # + # $Id$ + class HTML < Scanner + + include Streamable + register_for :html + + ATTR_NAME = /[\w.:-]+/ + ATTR_VALUE_UNQUOTED = ATTR_NAME + TAG_END = /\/?>/ + HEX = /[0-9a-fA-F]/ + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + + private + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_tokens tokens, options + + state = @state + plain_string_content = @plain_string_content + + until eos? + + kind = :error + match = nil + + if scan(/\s+/m) + kind = :space + + else + + case state + + when :initial + if scan(//m) + kind = :comment + elsif scan(//m) + kind = :preprocessor + elsif scan(/<\?xml.*?\?>/m) + kind = :preprocessor + elsif scan(/<\?.*?\?>|<%.*?%>/m) + kind = :comment + elsif scan(/<\/[-\w_.:]*>/m) + kind = :tag + elsif match = scan(/<[-\w_.:]*>?/m) + kind = :tag + state = :attribute unless match[-1] == ?> + elsif scan(/[^<>&]+/) + kind = :plain + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[>&]/) + kind = :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + end + + when :attribute + if scan(/#{TAG_END}/) + kind = :tag + state = :initial + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + state = :attribute_equal + else + getch + end + + when :attribute_equal + if scan(/=/) + kind = :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + elsif scan(/./) + state = :attribute + end + + when :attribute_value + if scan(/#{ATTR_VALUE_UNQUOTED}/o) + kind = :attribute_value + state = :attribute + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + else + getch + end + + when :attribute_value_string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :attribute + next + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[\n>]/) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect 'Unknown state: %p' % [state], tokens + + end + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + tokens + end + + end end end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb index 119924b..5955195 100644 --- a/lib/coderay/scanners/nitro_html.rb +++ b/lib/coderay/scanners/nitro_html.rb @@ -1,125 +1,125 @@ module CodeRay module Scanners - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class NitroHTML < Scanner - - include Streamable - register_for :nitro_html - - NITRO_RUBY_BLOCK = / - <\?r - (?> - [^\?]* - (?> \?(?!>) [^\?]* )* - ) - (?: \?> )? - | - - (?> - [^<]* - (?> <(?!\/ruby>) [^<]* )* - ) - (?: <\/ruby> )? - | - <% - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /mx - - NITRO_VALUE_BLOCK = / - \# - (?: - \{ - [^{}]* - (?> - \{ [^}]* \} - (?> [^{}]* ) - )* - \}? - | \| [^|]* \|? - | \( [^)]* \)? - | \[ [^\]]* \]? - | \\ [^\\]* \\? - ) - /x - - NITRO_ENTITY = / - % (?: \#\d+ | \w+ ) ; - / - - START_OF_RUBY = / - (?=[<\#%]) - < (?: \?r | % | ruby> ) - | \# [{(|] - | % (?: \#\d+ | \w+ ) ; - /x - - CLOSING_PAREN = Hash.new do |h, p| - h[p] = p - end.update( { - '(' => ')', - '[' => ']', - '{' => '}', - } ) - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) - start_tag = match[0,2] - delimiter = CLOSING_PAREN[start_tag[1,1]] - end_tag = match[-1,1] == delimiter ? delimiter : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) - start_tag = '' ? '?>' : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -(end_tag.size)-1] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif entity = scan(/#{NITRO_ENTITY}/o) - tokens << [entity, :entity] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class NitroHTML < Scanner + + include Streamable + register_for :nitro_html + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end end end diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 3824ee9..9007646 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ module CodeRay module Scanners - class Plaintext < Scanner - - register_for :plaintext, :plain + class Plaintext < Scanner - def scan_tokens tokens, options - tokens << [scan_until(/\z/), :plain] - end + register_for :plaintext, :plain - end + def scan_tokens tokens, options + tokens << [scan_until(/\z/), :plain] + end + + end end end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 3b1817e..15a7566 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,65 @@ module CodeRay module Scanners - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class RHTML < Scanner - - include Streamable - register_for :rhtml - - ERB_RUBY_BLOCK = / - <%(?!%)[=-]? - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /x - - START_OF_ERB = / - <%(?!%) - /x - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{ERB_RUBY_BLOCK}/o) - start_tag = match[/\A<%[-=]?/] - end_tag = match[/%?>?\z/] - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end end end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 7ba3029..3ce5003 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -157,7 +157,7 @@ module Scanners next # }}} else -# {{{ +# {{{ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) fancy_allowed = true @@ -190,7 +190,7 @@ module Scanners next elsif state == :initial - + # IDENTS # if match = scan(/#{patterns::METHOD_NAME}/o) if last_token_dot @@ -205,7 +205,7 @@ module Scanners end ## experimental! fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) - + # OPERATORS # elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) @@ -228,12 +228,12 @@ module Scanners end end end - + elsif match = scan(/ ['"] /mx) tokens << [:open, :string] type = :delimiter state = patterns::StringState.new :string, match == '"', match # important for streaming - + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) type = :instance_variable @@ -246,7 +246,7 @@ module Scanners tokens = [] saved_tokens = tokens end - + elsif match = scan(/#{patterns::NUMERIC}/o) type = if self[1] then :float else :integer end @@ -261,11 +261,11 @@ module Scanners else type = :symbol end - + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) regexp_allowed = fancy_allowed = :set type = :operator - + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) indented = self[1] == '-' quote = self[3] @@ -277,7 +277,7 @@ module Scanners heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) heredocs ||= [] # create heredocs if empty heredocs << heredoc - + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) type, interpreted = *patterns::FancyStringType.fetch(self[1]) do raise_inspect 'Unknown fancy string: %%%p' % k, tokens @@ -301,18 +301,18 @@ module Scanners type = :delimiter state = patterns::StringState.new :shell, true, match end - + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) type = :global_variable - + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) type = :class_variable - + else match = getch - + end - + elsif state == :def_expected state = :initial if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) @@ -341,7 +341,7 @@ module Scanners state = :initial next end - + elsif state == :undef_comma_expected if match = scan(/,/) type = :operator @@ -377,7 +377,7 @@ module Scanners raise_inspect 'Empty token', tokens unless match tokens << [match, type] - + if last_state state = last_state last_state = nil diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index c38739d..b1e0d1b 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -25,7 +25,7 @@ module Scanners ] REGEXP_ALLOWED = WordList.new(false). add(IDENTS_ALLOWING_REGEXP, :set) - + PREDEFINED_CONSTANTS = %w[ nil true false self DATA ARGV ARGF __FILE__ __LINE__ @@ -60,7 +60,7 @@ module Scanners '/'=> :regexp, } QUOTE_TO_TYPE.default = :string - + REGEXP_MODIFIERS = /[mixounse]*/ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ @@ -145,17 +145,17 @@ module Scanners } FancyStringType['w'] = FancyStringType['q'] FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] - + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, :paren, :paren_depth, :pattern, :next_state - + CLOSING_PAREN = Hash[ *%w[ ( ) [ ] < > { } ] ] - + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << OPENING_PAREN = CLOSING_PAREN.invert @@ -166,8 +166,8 @@ module Scanners delim_pattern << Regexp.escape(closing_paren) end - - special_escapes = + + special_escapes = case interpreted when :regexp_symbols '| ' + REGEXP_SYMBOLS.source diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index d1c6bad..5ce8ce9 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ module CodeRay module Scanners - load :html - - # XML Scanner - # - # $Id$ - # - # Currently this is the same scanner as Scanners::HTML. - class XML < HTML + load :html - register_for :xml + # XML Scanner + # + # $Id$ + # + # Currently this is the same scanner as Scanners::HTML. + class XML < HTML - end + register_for :xml + + end end end diff --git a/lib/coderay/style.rb b/lib/coderay/style.rb index d0eb7ae..057f8d4 100644 --- a/lib/coderay/style.rb +++ b/lib/coderay/style.rb @@ -1,20 +1,20 @@ module CodeRay - # This module holds the Style class and its subclasses. - # - # See Plugin. - module Styles - extend PluginHost - plugin_path File.dirname(__FILE__), 'styles' + # This module holds the Style class and its subclasses. + # + # See Plugin. + module Styles + extend PluginHost + plugin_path File.dirname(__FILE__), 'styles' - class Style - extend Plugin - plugin_host Styles + class Style + extend Plugin + plugin_host Styles - DEFAULT_OPTIONS = { } + DEFAULT_OPTIONS = { } - end + end - end + end end diff --git a/lib/coderay/styles/_map.rb b/lib/coderay/styles/_map.rb index 25687d6..4420470 100644 --- a/lib/coderay/styles/_map.rb +++ b/lib/coderay/styles/_map.rb @@ -1,7 +1,7 @@ module CodeRay module Styles - - default :cycnus - + + default :cycnus + end end diff --git a/lib/coderay/styles/cycnus.rb b/lib/coderay/styles/cycnus.rb index 5037044..c65ab91 100644 --- a/lib/coderay/styles/cycnus.rb +++ b/lib/coderay/styles/cycnus.rb @@ -1,21 +1,21 @@ module CodeRay module Styles - class Cycnus < Style + class Cycnus < Style - register_for :cycnus + register_for :cycnus - code_background = '#f8f8f8' - numbers_background = '#def' - border_color = 'silver' - normal_color = '#100' + code_background = '#f8f8f8' + numbers_background = '#def' + border_color = 'silver' + normal_color = '#100' - CSS_MAIN_STYLES = <<-MAIN + CSS_MAIN_STYLES = <<-MAIN .CodeRay { - background-color: #{code_background}; - border: 1px solid #{border_color}; - font-family: 'Courier New', 'Terminal', monospace; - color: #{normal_color}; + background-color: #{code_background}; + border: 1px solid #{border_color}; + font-family: 'Courier New', 'Terminal', monospace; + color: #{normal_color}; } .CodeRay pre { margin: 0px } @@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px } table.CodeRay td { padding: 2px 4px; vertical-align: top } .CodeRay .line_numbers, .CodeRay .no { - background-color: #{numbers_background}; - color: gray; - text-align: right; + background-color: #{numbers_background}; + color: gray; + text-align: right; } .CodeRay .line_numbers tt { font-weight: bold } .CodeRay .no { padding: 0px 4px } @@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt } ol.CodeRay li { white-space: pre } .CodeRay .code pre { overflow: auto } - MAIN + MAIN - TOKEN_COLORS = <<-'TOKENS' + TOKEN_COLORS = <<-'TOKENS' .af { color:#00C } .an { color:#007 } .av { color:#700 } @@ -117,9 +117,9 @@ ol.CodeRay li { white-space: pre } .ty { color:#339; font-weight:bold } .v { color:#036 } .xt { color:#444 } - TOKENS + TOKENS - end + end end end diff --git a/lib/coderay/styles/murphy.rb b/lib/coderay/styles/murphy.rb index 9377a10..f2fa798 100644 --- a/lib/coderay/styles/murphy.rb +++ b/lib/coderay/styles/murphy.rb @@ -1,21 +1,21 @@ module CodeRay module Styles - class Murphy < Style + class Murphy < Style - register_for :murphy + register_for :murphy - code_background = '#001129' - numbers_background = code_background - border_color = 'silver' - normal_color = '#C0C0C0' + code_background = '#001129' + numbers_background = code_background + border_color = 'silver' + normal_color = '#C0C0C0' - CSS_MAIN_STYLES = <<-MAIN + CSS_MAIN_STYLES = <<-MAIN .CodeRay { - background-color: #{code_background}; - border: 1px solid #{border_color}; - font-family: 'Courier New', 'Terminal', monospace; - color: #{normal_color}; + background-color: #{code_background}; + border: 1px solid #{border_color}; + font-family: 'Courier New', 'Terminal', monospace; + color: #{normal_color}; } .CodeRay pre { margin: 0px; } @@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px; } table.CodeRay td { padding: 2px 4px; vertical-align: top; } .CodeRay .line_numbers, .CodeRay .no { - background-color: #{numbers_background}; - color: gray; - text-align: right; + background-color: #{numbers_background}; + color: gray; + text-align: right; } .CodeRay .line_numbers tt { font-weight: bold; } .CodeRay .no { padding: 0px 4px; } @@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt; } ol.CodeRay li { white-space: pre; } .CodeRay .code pre { overflow: auto; } - MAIN + MAIN - TOKEN_COLORS = <<-'TOKENS' + TOKEN_COLORS = <<-'TOKENS' .af { color:#00C; } .an { color:#007; } .av { color:#700; } @@ -111,9 +111,9 @@ ol.CodeRay li { white-space: pre; } .ty { color:#339; font-weight:bold; } .v { color:#036; } .xt { color:#444; } - TOKENS + TOKENS - end + end end end diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index c7ca356..8b8c692 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -1,322 +1,322 @@ module CodeRay - # = Tokens - # - # The Tokens class represents a list of tokens returnd from - # a Scanner. - # - # A token is not a special object, just a two-element Array - # consisting of - # * the _token_ _kind_ (a Symbol representing the type of the token) - # * the _token_ _text_ (the original source of the token in a String) - # - # A token looks like this: - # - # [:comment, '# It looks like this'] - # [:float, '3.1415926'] - # [:error, 'äöü'] - # - # Some scanners also yield some kind of sub-tokens, represented by special - # token texts, namely :open and :close . - # - # The Ruby scanner, for example, splits "a string" into: - # - # [ - # [:open, :string], - # [:delimiter, '"'], - # [:content, 'a string'], - # [:delimiter, '"'], - # [:close, :string] - # ] - # - # Tokens is also the interface between Scanners and Encoders: - # The input is split and saved into a Tokens object. The Encoder - # then builds the output from this object. - # - # Thus, the syntax below becomes clear: - # - # CodeRay.scan('price = 2.59', :ruby).html - # # the Tokens object is here -------^ - # - # See how small it is? ;) - # - # Tokens gives you the power to handle pre-scanned code very easily: - # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string - # that you put in your DB. - # - # Tokens' subclass TokenStream allows streaming to save memory. - class Tokens < Array - - class << self - - # Convert the token to a string. - # - # This format is used by Encoders.Tokens. - # It can be reverted using read_token. - def write_token text, type - if text.is_a? String - "#{type}\t#{escape(text)}\n" - else - ":#{text}\t#{type}\t\n" - end - end - - # Read a token from the string. - # - # Inversion of write_token. - # - # TODO Test this! - def read_token token - type, text = token.split("\t", 2) - if type[0] == ?: - [text.to_sym, type[1..-1].to_sym] - else - [type.to_sym, unescape(text)] - end - end - - # Escapes a string for use in write_token. - def escape text - text.gsub(/[\n\\]/, '\\\\\&') - end - - # Unescapes a string created by escape. - def unescape text - text.gsub(/\\[\n\\]/) { |m| m[1,1] } - end - - end - - # Whether the object is a TokenStream. - # - # Returns false. - def stream? - false - end - - # Iterates over all tokens. - # - # If a filter is given, only tokens of that kind are yielded. - def each kind_filter = nil, &block - unless kind_filter - super(&block) - else - super() do |text, kind| - next unless kind == kind_filter - yield text, kind - end - end - end - - # Iterates over all text tokens. - # Range tokens like [:open, :string] are left out. - # - # Example: - # tokens.each_text_token { |text, kind| text.replace html_escape(text) } - def each_text_token - each do |text, kind| - next unless text.respond_to? :to_str - yield text, kind - end - end - - # Encode the tokens using encoder. - # - # encoder can be - # * a symbol like :html oder :statistic - # * an Encoder class - # * an Encoder object - # - # options are passed to the encoder. - def encode encoder, options = {} - unless encoder.is_a? Encoders::Encoder - unless encoder.is_a? Class - encoder_class = Encoders[encoder] - end - encoder = encoder_class.new options - end - encoder.encode_tokens self, options - end - - - # Turn into a string using Encoders::Text. - # - # +options+ are passed to the encoder if given. - def to_s options = {} - encode :text, options - end - - - # Redirects unknown methods to encoder calls. - # - # For example, if you call +tokens.html+, the HTML encoder - # is used to highlight the tokens. - def method_missing meth, options = {} - Encoders[meth].new(options).encode_tokens self - end - - # Returns the tokens compressed by joining consecutive - # tokens of the same kind. - # - # This can not be undone, but should yield the same output - # in most Encoders. It basically makes the output smaller. - # - # Combined with dump, it saves space for the cost of time. - # - # If the scanner is written carefully, this is not required - - # for example, consecutive //-comment lines could already be - # joined in one comment token by the Scanner. - def optimize - print ' Tokens#optimize: before: %d - ' % size if $DEBUG - last_kind = last_text = nil - new = self.class.new - each do |text, kind| - if text.is_a? String - if kind == last_kind - last_text << text - else - new << [last_text, last_kind] if last_kind - last_text = text - last_kind = kind - end - else - new << [last_text, last_kind] if last_kind - last_kind = last_text = nil - new << [text, kind] - end - end - new << [last_text, last_kind] if last_kind - print 'after: %d (%d saved = %2.0f%%)' % - [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG - new - end - - # Compact the object itself; see optimize. - def optimize! - replace optimize - end - - # Dumps the object into a String that can be saved - # in files or databases. - # - # The dump is created with Marshal.dump; - # In addition, it is gzipped using GZip.gzip. - # - # The returned String object includes Undumping - # so it has an #undump method. See Tokens.load. - # - # You can configure the level of compression, - # but the default value 7 should be what you want - # in most cases as it is a good comprimise between - # speed and compression rate. - # - # See GZip module. - def dump gzip_level = 7 - require 'coderay/helpers/gzip_simple' - dump = Marshal.dump self - dump = dump.gzip gzip_level - dump.extend Undumping - end - - # The total size of the tokens. - # Should be equal to the input size before - # scanning. - def text_size - map { |t, k| t }.join.size - end - - # Include this module to give an object an #undump - # method. - # - # The string returned by Tokens.dump includes Undumping. - module Undumping - # Calls Tokens.load with itself. - def undump - Tokens.load self - end - end - - # Undump the object using Marshal.load, then - # unzip it using GZip.gunzip. - # - # The result is commonly a Tokens object, but - # this is not guaranteed. - def Tokens.load dump - require 'coderay/helpers/gzip_simple' - dump = dump.gunzip - @dump = Marshal.load dump - end - - end - - - # = TokenStream - # - # The TokenStream class is a fake Array without elements. - # - # It redirects the method << to a block given at creation. - # - # This allows scanners and Encoders to use streaming (no - # tokens are saved, the input is highlighted the same time it - # is scanned) with the same code. - # - # See CodeRay.encode_stream and CodeRay.scan_stream - class TokenStream < Tokens - - # Whether the object is a TokenStream. - # - # Returns true. - def stream? - true - end - - # The Array is empty, but size counts the tokens given by <<. - attr_reader :size - - # Creates a new TokenStream that calls +block+ whenever - # its << method is called. - # - # Example: - # - # require 'coderay' - # - # token_stream = CodeRay::TokenStream.new do |kind, text| - # puts 'kind: %s, text size: %d.' % [kind, text.size] - # end - # - # token_stream << [:regexp, '/\d+/'] - # #-> kind: rexpexp, text size: 5. - # - def initialize &block - raise ArgumentError, 'Block expected for streaming.' unless block - @callback = block - @size = 0 - end - - # Calls +block+ with +token+ and increments size. - # - # Returns self. - def << token - @callback.call token - @size += 1 - self - end - - # This method is not implemented due to speed reasons. Use Tokens. - def text_size - raise NotImplementedError, 'This method is not implemented due to speed reasons.' - end - - # A TokenStream cannot be dumped. Use Tokens. - def dump - raise NotImplementedError, 'A TokenStream cannot be dumped.' - end - - # A TokenStream cannot be optimized. Use Tokens. - def optimize - raise NotImplementedError, 'A TokenStream cannot be optimized.' - end - - end + # = Tokens + # + # The Tokens class represents a list of tokens returnd from + # a Scanner. + # + # A token is not a special object, just a two-element Array + # consisting of + # * the _token_ _kind_ (a Symbol representing the type of the token) + # * the _token_ _text_ (the original source of the token in a String) + # + # A token looks like this: + # + # [:comment, '# It looks like this'] + # [:float, '3.1415926'] + # [:error, 'äöü'] + # + # Some scanners also yield some kind of sub-tokens, represented by special + # token texts, namely :open and :close . + # + # The Ruby scanner, for example, splits "a string" into: + # + # [ + # [:open, :string], + # [:delimiter, '"'], + # [:content, 'a string'], + # [:delimiter, '"'], + # [:close, :string] + # ] + # + # Tokens is also the interface between Scanners and Encoders: + # The input is split and saved into a Tokens object. The Encoder + # then builds the output from this object. + # + # Thus, the syntax below becomes clear: + # + # CodeRay.scan('price = 2.59', :ruby).html + # # the Tokens object is here -------^ + # + # See how small it is? ;) + # + # Tokens gives you the power to handle pre-scanned code very easily: + # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string + # that you put in your DB. + # + # Tokens' subclass TokenStream allows streaming to save memory. + class Tokens < Array + + class << self + + # Convert the token to a string. + # + # This format is used by Encoders.Tokens. + # It can be reverted using read_token. + def write_token text, type + if text.is_a? String + "#{type}\t#{escape(text)}\n" + else + ":#{text}\t#{type}\t\n" + end + end + + # Read a token from the string. + # + # Inversion of write_token. + # + # TODO Test this! + def read_token token + type, text = token.split("\t", 2) + if type[0] == ?: + [text.to_sym, type[1..-1].to_sym] + else + [type.to_sym, unescape(text)] + end + end + + # Escapes a string for use in write_token. + def escape text + text.gsub(/[\n\\]/, '\\\\\&') + end + + # Unescapes a string created by escape. + def unescape text + text.gsub(/\\[\n\\]/) { |m| m[1,1] } + end + + end + + # Whether the object is a TokenStream. + # + # Returns false. + def stream? + false + end + + # Iterates over all tokens. + # + # If a filter is given, only tokens of that kind are yielded. + def each kind_filter = nil, &block + unless kind_filter + super(&block) + else + super() do |text, kind| + next unless kind == kind_filter + yield text, kind + end + end + end + + # Iterates over all text tokens. + # Range tokens like [:open, :string] are left out. + # + # Example: + # tokens.each_text_token { |text, kind| text.replace html_escape(text) } + def each_text_token + each do |text, kind| + next unless text.respond_to? :to_str + yield text, kind + end + end + + # Encode the tokens using encoder. + # + # encoder can be + # * a symbol like :html oder :statistic + # * an Encoder class + # * an Encoder object + # + # options are passed to the encoder. + def encode encoder, options = {} + unless encoder.is_a? Encoders::Encoder + unless encoder.is_a? Class + encoder_class = Encoders[encoder] + end + encoder = encoder_class.new options + end + encoder.encode_tokens self, options + end + + + # Turn into a string using Encoders::Text. + # + # +options+ are passed to the encoder if given. + def to_s options = {} + encode :text, options + end + + + # Redirects unknown methods to encoder calls. + # + # For example, if you call +tokens.html+, the HTML encoder + # is used to highlight the tokens. + def method_missing meth, options = {} + Encoders[meth].new(options).encode_tokens self + end + + # Returns the tokens compressed by joining consecutive + # tokens of the same kind. + # + # This can not be undone, but should yield the same output + # in most Encoders. It basically makes the output smaller. + # + # Combined with dump, it saves space for the cost of time. + # + # If the scanner is written carefully, this is not required - + # for example, consecutive //-comment lines could already be + # joined in one comment token by the Scanner. + def optimize + print ' Tokens#optimize: before: %d - ' % size if $DEBUG + last_kind = last_text = nil + new = self.class.new + each do |text, kind| + if text.is_a? String + if kind == last_kind + last_text << text + else + new << [last_text, last_kind] if last_kind + last_text = text + last_kind = kind + end + else + new << [last_text, last_kind] if last_kind + last_kind = last_text = nil + new << [text, kind] + end + end + new << [last_text, last_kind] if last_kind + print 'after: %d (%d saved = %2.0f%%)' % + [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG + new + end + + # Compact the object itself; see optimize. + def optimize! + replace optimize + end + + # Dumps the object into a String that can be saved + # in files or databases. + # + # The dump is created with Marshal.dump; + # In addition, it is gzipped using GZip.gzip. + # + # The returned String object includes Undumping + # so it has an #undump method. See Tokens.load. + # + # You can configure the level of compression, + # but the default value 7 should be what you want + # in most cases as it is a good comprimise between + # speed and compression rate. + # + # See GZip module. + def dump gzip_level = 7 + require 'coderay/helpers/gzip_simple' + dump = Marshal.dump self + dump = dump.gzip gzip_level + dump.extend Undumping + end + + # The total size of the tokens. + # Should be equal to the input size before + # scanning. + def text_size + map { |t, k| t }.join.size + end + + # Include this module to give an object an #undump + # method. + # + # The string returned by Tokens.dump includes Undumping. + module Undumping + # Calls Tokens.load with itself. + def undump + Tokens.load self + end + end + + # Undump the object using Marshal.load, then + # unzip it using GZip.gunzip. + # + # The result is commonly a Tokens object, but + # this is not guaranteed. + def Tokens.load dump + require 'coderay/helpers/gzip_simple' + dump = dump.gunzip + @dump = Marshal.load dump + end + + end + + + # = TokenStream + # + # The TokenStream class is a fake Array without elements. + # + # It redirects the method << to a block given at creation. + # + # This allows scanners and Encoders to use streaming (no + # tokens are saved, the input is highlighted the same time it + # is scanned) with the same code. + # + # See CodeRay.encode_stream and CodeRay.scan_stream + class TokenStream < Tokens + + # Whether the object is a TokenStream. + # + # Returns true. + def stream? + true + end + + # The Array is empty, but size counts the tokens given by <<. + attr_reader :size + + # Creates a new TokenStream that calls +block+ whenever + # its << method is called. + # + # Example: + # + # require 'coderay' + # + # token_stream = CodeRay::TokenStream.new do |kind, text| + # puts 'kind: %s, text size: %d.' % [kind, text.size] + # end + # + # token_stream << [:regexp, '/\d+/'] + # #-> kind: rexpexp, text size: 5. + # + def initialize &block + raise ArgumentError, 'Block expected for streaming.' unless block + @callback = block + @size = 0 + end + + # Calls +block+ with +token+ and increments size. + # + # Returns self. + def << token + @callback.call token + @size += 1 + self + end + + # This method is not implemented due to speed reasons. Use Tokens. + def text_size + raise NotImplementedError, 'This method is not implemented due to speed reasons.' + end + + # A TokenStream cannot be dumped. Use Tokens. + def dump + raise NotImplementedError, 'A TokenStream cannot be dumped.' + end + + # A TokenStream cannot be optimized. Use Tokens. + def optimize + raise NotImplementedError, 'A TokenStream cannot be optimized.' + end + + end end -- cgit v1.2.1