summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2006-07-11 05:40:21 +0000
committermurphy <murphy@rubychan.de>2006-07-11 05:40:21 +0000
commit7bb2aef0553091a10c197e302475c9f14de8a860 (patch)
tree72ea9444276fe97dc99ae82aa9e46070ec7ba9ea /lib/coderay
parent26a8e5a0388199ac686db28d631b05a5b5aa02e1 (diff)
downloadcoderay-7bb2aef0553091a10c197e302475c9f14de8a860.tar.gz
rake test now runs in debug mode.
All .rb files converted to UNIX format (where did the \r come from?)
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/duo.rb58
-rw-r--r--lib/coderay/encoder.rb346
-rw-r--r--lib/coderay/encoders/_map.rb16
-rw-r--r--lib/coderay/encoders/count.rb42
-rw-r--r--lib/coderay/encoders/debug.rb92
-rw-r--r--lib/coderay/encoders/div.rb40
-rw-r--r--lib/coderay/encoders/html/classes.rb146
-rw-r--r--lib/coderay/encoders/html/css.rb130
-rw-r--r--lib/coderay/encoders/html/numerization.rb244
-rw-r--r--lib/coderay/encoders/html/output.rb390
-rw-r--r--lib/coderay/encoders/null.rb52
-rw-r--r--lib/coderay/encoders/page.rb42
-rw-r--r--lib/coderay/encoders/span.rb40
-rw-r--r--lib/coderay/encoders/statistic.rb162
-rw-r--r--lib/coderay/encoders/text.rb66
-rw-r--r--lib/coderay/encoders/tokens.rb88
-rw-r--r--lib/coderay/encoders/xml.rb142
-rw-r--r--lib/coderay/encoders/yaml.rb44
-rw-r--r--lib/coderay/scanners/plaintext.rb30
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb432
-rw-r--r--lib/coderay/scanners/xml.rb36
-rw-r--r--lib/coderay/style.rb40
-rw-r--r--lib/coderay/tokens.rb644
23 files changed, 1661 insertions, 1661 deletions
diff --git a/lib/coderay/duo.rb b/lib/coderay/duo.rb
index 3125568..0e5956e 100644
--- a/lib/coderay/duo.rb
+++ b/lib/coderay/duo.rb
@@ -1,29 +1,29 @@
-module CodeRay
-
- # = Duo
- #
- # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
- #
- # TODO: Doc.
- class Duo
-
- attr_accessor :scanner, :encoder
-
- def initialize lang, format, options = {}
- @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options)
- @encoder = CodeRay.encoder format, options
- end
-
- class << self
- alias [] new
- end
-
- def encode code
- @scanner.string = code
- @encoder.encode_tokens(scanner.tokenize)
- end
- alias highlight encode
-
- end
-
-end
+module CodeRay
+
+ # = Duo
+ #
+ # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
+ #
+ # TODO: Doc.
+ class Duo
+
+ attr_accessor :scanner, :encoder
+
+ def initialize lang, format, options = {}
+ @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options)
+ @encoder = CodeRay.encoder format, options
+ end
+
+ class << self
+ alias [] new
+ end
+
+ def encode code
+ @scanner.string = code
+ @encoder.encode_tokens(scanner.tokenize)
+ end
+ alias highlight encode
+
+ end
+
+end
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb
index 1065a9c..221cd58 100644
--- a/lib/coderay/encoder.rb
+++ b/lib/coderay/encoder.rb
@@ -1,173 +1,173 @@
-module CodeRay
-
- # This module holds the Encoder class and its subclasses.
- # For example, the HTML encoder is named CodeRay::Encoders::HTML
- # can be found in coderay/encoders/html.
- #
- # Encoders also provides methods and constants for the register
- # mechanism and the [] method that returns the Encoder class
- # belonging to the given format.
- module Encoders
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'encoders'
-
- # = Encoder
- #
- # The Encoder base class. Together with Scanner and
- # Tokens, it forms the highlighting triad.
- #
- # Encoder instances take a Tokens object and do something with it.
- #
- # The most common Encoder is surely the HTML encoder
- # (CodeRay::Encoders::HTML). It highlights the code in a colorful
- # html page.
- # If you want the highlighted code in a div or a span instead,
- # use its subclasses Div and Span.
- class Encoder
- extend Plugin
- plugin_host Encoders
-
- attr_reader :token_stream
-
- class << self
-
- # Returns if the Encoder can be used in streaming mode.
- def streamable?
- is_a? Streamable
- end
-
- # If FILE_EXTENSION isn't defined, this method returns the
- # downcase class name instead.
- def const_missing sym
- if sym == :FILE_EXTENSION
- sym.to_s.downcase
- else
- super
- end
- end
-
- end
-
- # Subclasses are to store their default options in this constant.
- DEFAULT_OPTIONS = { :stream => false }
-
- # The options you gave the Encoder at creating.
- attr_accessor :options
-
- # Creates a new Encoder.
- # +options+ is saved and used for all encode operations, as long
- # as you don't overwrite it there by passing additional options.
- #
- # Encoder objects provide three encode methods:
- # - encode simply takes a +code+ string and a +lang+
- # - encode_tokens expects a +tokens+ object instead
- # - encode_stream is like encode, but uses streaming mode.
- #
- # Each method has an optional +options+ parameter. These are
- # added to the options you passed at creation.
- def initialize options = {}
- @options = self.class::DEFAULT_OPTIONS.merge options
- raise "I am only the basic Encoder class. I can't encode "\
- "anything. :( Use my subclasses." if self.class == Encoder
- end
-
- # Encode a Tokens object.
- def encode_tokens tokens, options = {}
- options = @options.merge options
- setup options
- compile tokens, options
- finish options
- end
-
- # Encode the given +code+ after tokenizing it using the Scanner
- # for +lang+.
- def encode code, lang, options = {}
- options = @options.merge options
- scanner_options = CodeRay.get_scanner_options(options)
- tokens = CodeRay.scan code, lang, scanner_options
- encode_tokens tokens, options
- end
-
- # You can use highlight instead of encode, if that seems
- # more clear to you.
- alias highlight encode
-
- # Encode the given +code+ using the Scanner for +lang+ in
- # streaming mode.
- def encode_stream code, lang, options = {}
- raise NotStreamableError, self unless kind_of? Streamable
- options = @options.merge options
- setup options
- scanner_options = CodeRay.get_scanner_options options
- @token_stream =
- CodeRay.scan_stream code, lang, scanner_options, &self
- finish options
- end
-
- # Behave like a proc. The token method is converted to a proc.
- def to_proc
- method(:token).to_proc
- end
-
- # Return the default file extension for outputs of this encoder.
- def file_extension
- self.class::FILE_EXTENSION
- end
-
- protected
-
- # Called with merged options before encoding starts.
- # Sets @out to an empty string.
- #
- # See the HTML Encoder for an example of option caching.
- def setup options
- @out = ''
- end
-
- # Called with +text+ and +kind+ of the currently scanned token.
- # For simple scanners, it's enougth to implement this method.
- #
- # By default, it calls text_token or block_token, depending on
- # whether +text+ is a String.
- def token text, kind
- if text.is_a? ::String
- text_token text, kind
- elsif text.is_a? ::Symbol
- block_token text, kind
- else
- raise 'Unknown token text type: %p' % text
- end
- end
-
- def text_token text, kind
- end
-
- def block_token action, kind
- case action
- when :open
- open_token kind
- when :close
- close_token kind
- else
- raise 'unknown block action: %p' % action
- end
- end
-
- # Called with merged options after encoding starts.
- # The return value is the result of encoding, typically @out.
- def finish options
- @out
- end
-
- # Do the encoding.
- #
- # The already created +tokens+ object must be used; it can be a
- # TokenStream or a Tokens object.
- def compile tokens, options
- tokens.each(&self)
- end
-
- end
-
- end
-end
+module CodeRay
+
+ # This module holds the Encoder class and its subclasses.
+ # For example, the HTML encoder is named CodeRay::Encoders::HTML
+ # can be found in coderay/encoders/html.
+ #
+ # Encoders also provides methods and constants for the register
+ # mechanism and the [] method that returns the Encoder class
+ # belonging to the given format.
+ module Encoders
+ extend PluginHost
+ plugin_path File.dirname(__FILE__), 'encoders'
+
+ # = Encoder
+ #
+ # The Encoder base class. Together with Scanner and
+ # Tokens, it forms the highlighting triad.
+ #
+ # Encoder instances take a Tokens object and do something with it.
+ #
+ # The most common Encoder is surely the HTML encoder
+ # (CodeRay::Encoders::HTML). It highlights the code in a colorful
+ # html page.
+ # If you want the highlighted code in a div or a span instead,
+ # use its subclasses Div and Span.
+ class Encoder
+ extend Plugin
+ plugin_host Encoders
+
+ attr_reader :token_stream
+
+ class << self
+
+ # Returns if the Encoder can be used in streaming mode.
+ def streamable?
+ is_a? Streamable
+ end
+
+ # If FILE_EXTENSION isn't defined, this method returns the
+ # downcase class name instead.
+ def const_missing sym
+ if sym == :FILE_EXTENSION
+ sym.to_s.downcase
+ else
+ super
+ end
+ end
+
+ end
+
+ # Subclasses are to store their default options in this constant.
+ DEFAULT_OPTIONS = { :stream => false }
+
+ # The options you gave the Encoder at creating.
+ attr_accessor :options
+
+ # Creates a new Encoder.
+ # +options+ is saved and used for all encode operations, as long
+ # as you don't overwrite it there by passing additional options.
+ #
+ # Encoder objects provide three encode methods:
+ # - encode simply takes a +code+ string and a +lang+
+ # - encode_tokens expects a +tokens+ object instead
+ # - encode_stream is like encode, but uses streaming mode.
+ #
+ # Each method has an optional +options+ parameter. These are
+ # added to the options you passed at creation.
+ def initialize options = {}
+ @options = self.class::DEFAULT_OPTIONS.merge options
+ raise "I am only the basic Encoder class. I can't encode "\
+ "anything. :( Use my subclasses." if self.class == Encoder
+ end
+
+ # Encode a Tokens object.
+ def encode_tokens tokens, options = {}
+ options = @options.merge options
+ setup options
+ compile tokens, options
+ finish options
+ end
+
+ # Encode the given +code+ after tokenizing it using the Scanner
+ # for +lang+.
+ def encode code, lang, options = {}
+ options = @options.merge options
+ scanner_options = CodeRay.get_scanner_options(options)
+ tokens = CodeRay.scan code, lang, scanner_options
+ encode_tokens tokens, options
+ end
+
+ # You can use highlight instead of encode, if that seems
+ # more clear to you.
+ alias highlight encode
+
+ # Encode the given +code+ using the Scanner for +lang+ in
+ # streaming mode.
+ def encode_stream code, lang, options = {}
+ raise NotStreamableError, self unless kind_of? Streamable
+ options = @options.merge options
+ setup options
+ scanner_options = CodeRay.get_scanner_options options
+ @token_stream =
+ CodeRay.scan_stream code, lang, scanner_options, &self
+ finish options
+ end
+
+ # Behave like a proc. The token method is converted to a proc.
+ def to_proc
+ method(:token).to_proc
+ end
+
+ # Return the default file extension for outputs of this encoder.
+ def file_extension
+ self.class::FILE_EXTENSION
+ end
+
+ protected
+
+ # Called with merged options before encoding starts.
+ # Sets @out to an empty string.
+ #
+ # See the HTML Encoder for an example of option caching.
+ def setup options
+ @out = ''
+ end
+
+ # Called with +text+ and +kind+ of the currently scanned token.
+ # For simple scanners, it's enougth to implement this method.
+ #
+ # By default, it calls text_token or block_token, depending on
+ # whether +text+ is a String.
+ def token text, kind
+ if text.is_a? ::String
+ text_token text, kind
+ elsif text.is_a? ::Symbol
+ block_token text, kind
+ else
+ raise 'Unknown token text type: %p' % text
+ end
+ end
+
+ def text_token text, kind
+ end
+
+ def block_token action, kind
+ case action
+ when :open
+ open_token kind
+ when :close
+ close_token kind
+ else
+ raise 'unknown block action: %p' % action
+ end
+ end
+
+ # Called with merged options after encoding starts.
+ # The return value is the result of encoding, typically @out.
+ def finish options
+ @out
+ end
+
+ # Do the encoding.
+ #
+ # The already created +tokens+ object must be used; it can be a
+ # TokenStream or a Tokens object.
+ def compile tokens, options
+ tokens.each(&self)
+ end
+
+ end
+
+ end
+end
diff --git a/lib/coderay/encoders/_map.rb b/lib/coderay/encoders/_map.rb
index a22a951..fdd8ae4 100644
--- a/lib/coderay/encoders/_map.rb
+++ b/lib/coderay/encoders/_map.rb
@@ -1,8 +1,8 @@
-module CodeRay
-module Encoders
-
- map :stats => :statistic,
- :plain => :text
-
-end
-end
+module CodeRay
+module Encoders
+
+ map :stats => :statistic,
+ :plain => :text
+
+end
+end
diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb
index 6885541..c9a6dfd 100644
--- a/lib/coderay/encoders/count.rb
+++ b/lib/coderay/encoders/count.rb
@@ -1,21 +1,21 @@
-module CodeRay
-module Encoders
-
- class Count < Encoder
-
- include Streamable
- register_for :count
-
- protected
-
- def setup options
- @out = 0
- end
-
- def token text, kind
- @out += 1
- end
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ class Count < Encoder
+
+ include Streamable
+ register_for :count
+
+ protected
+
+ def setup options
+ @out = 0
+ end
+
+ def token text, kind
+ @out += 1
+ end
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index 2639e1f..eb9eaa4 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -1,46 +1,46 @@
-module CodeRay
-module Encoders
-
- # = Debug Encoder
- #
- # Fast encoder producing simple debug output.
- #
- # It is readable and diff-able and is used for testing.
- #
- # You cannot fully restore the tokens information from the
- # output, because consecutive :space tokens are merged.
- # Use Tokens#dump for caching purposes.
- class Debug < Encoder
-
- include Streamable
- register_for :debug
-
- FILE_EXTENSION = 'raydebug'
-
- protected
- def text_token text, kind
- @out <<
- if kind == :space
- text
- else
- text = text.gsub(/[)\\]/, '\\\\\0')
- "#{kind}(#{text})"
- end
- end
-
- def block_token action, kind
- @out << super
- end
-
- def open_token kind
- "#{kind}<"
- end
-
- def close_token kind
- ">"
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # = Debug Encoder
+ #
+ # Fast encoder producing simple debug output.
+ #
+ # It is readable and diff-able and is used for testing.
+ #
+ # You cannot fully restore the tokens information from the
+ # output, because consecutive :space tokens are merged.
+ # Use Tokens#dump for caching purposes.
+ class Debug < Encoder
+
+ include Streamable
+ register_for :debug
+
+ FILE_EXTENSION = 'raydebug'
+
+ protected
+ def text_token text, kind
+ @out <<
+ if kind == :space
+ text
+ else
+ text = text.gsub(/[)\\]/, '\\\\\0')
+ "#{kind}(#{text})"
+ end
+ end
+
+ def block_token action, kind
+ @out << super
+ end
+
+ def open_token kind
+ "#{kind}<"
+ end
+
+ def close_token kind
+ ">"
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb
index ce558f2..3d55415 100644
--- a/lib/coderay/encoders/div.rb
+++ b/lib/coderay/encoders/div.rb
@@ -1,20 +1,20 @@
-module CodeRay
-module Encoders
-
- load :html
-
- class Div < HTML
-
- FILE_EXTENSION = 'div.html'
-
- register_for :div
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :style,
- :wrap => :div,
- })
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ load :html
+
+ class Div < HTML
+
+ FILE_EXTENSION = 'div.html'
+
+ register_for :div
+
+ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+ :css => :style,
+ :wrap => :div,
+ })
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/html/classes.rb b/lib/coderay/encoders/html/classes.rb
index 8493fa0..ea15ca0 100644
--- a/lib/coderay/encoders/html/classes.rb
+++ b/lib/coderay/encoders/html/classes.rb
@@ -1,73 +1,73 @@
-module CodeRay
-module Encoders
-
- class HTML
-
- ClassOfKind = {
- :attribute_name => 'an',
- :attribute_name_fat => 'af',
- :attribute_value => 'av',
- :attribute_value_fat => 'aw',
- :bin => 'bi',
- :char => 'ch',
- :class => 'cl',
- :class_variable => 'cv',
- :color => 'cr',
- :comment => 'c',
- :constant => 'co',
- :content => 'k',
- :definition => 'df',
- :delimiter => 'dl',
- :directive => 'di',
- :doc => 'do',
- :doc_string => 'ds',
- :entity => 'en',
- :error => 'er',
- :escape => 'e',
- :exception => 'ex',
- :float => 'fl',
- :function => 'fu',
- :global_variable => 'gv',
- :hex => 'hx',
- :include => 'ic',
- :inline => 'il',
- :instance_variable => 'iv',
- :integer => 'i',
- :interpreted => 'in',
- :label => 'la',
- :local_variable => 'lv',
- :modifier => 'mod',
- :oct => 'oc',
- :operator_name => 'on',
- :pre_constant => 'pc',
- :pre_type => 'pt',
- :predefined => 'pd',
- :preprocessor => 'pp',
- :regexp => 'rx',
- :reserved => 'r',
- :shell => 'sh',
- :string => 's',
- :symbol => 'sy',
- :tag => 'ta',
- :tag_fat => 'tf',
- :tag_special => 'ts',
- :type => 'ty',
- :variable => 'v',
- :xml_text => 'xt',
-
- :ident => :NO_HIGHLIGHT, # 'id'
- #:operator => 'op',
- :operator => :NO_HIGHLIGHT, # 'op'
- :space => :NO_HIGHLIGHT, # 'sp'
- :plain => :NO_HIGHLIGHT,
- }
- ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
- ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
- ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
- ClassOfKind[:escape] = ClassOfKind[:delimiter]
- ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ class HTML
+
+ ClassOfKind = {
+ :attribute_name => 'an',
+ :attribute_name_fat => 'af',
+ :attribute_value => 'av',
+ :attribute_value_fat => 'aw',
+ :bin => 'bi',
+ :char => 'ch',
+ :class => 'cl',
+ :class_variable => 'cv',
+ :color => 'cr',
+ :comment => 'c',
+ :constant => 'co',
+ :content => 'k',
+ :definition => 'df',
+ :delimiter => 'dl',
+ :directive => 'di',
+ :doc => 'do',
+ :doc_string => 'ds',
+ :entity => 'en',
+ :error => 'er',
+ :escape => 'e',
+ :exception => 'ex',
+ :float => 'fl',
+ :function => 'fu',
+ :global_variable => 'gv',
+ :hex => 'hx',
+ :include => 'ic',
+ :inline => 'il',
+ :instance_variable => 'iv',
+ :integer => 'i',
+ :interpreted => 'in',
+ :label => 'la',
+ :local_variable => 'lv',
+ :modifier => 'mod',
+ :oct => 'oc',
+ :operator_name => 'on',
+ :pre_constant => 'pc',
+ :pre_type => 'pt',
+ :predefined => 'pd',
+ :preprocessor => 'pp',
+ :regexp => 'rx',
+ :reserved => 'r',
+ :shell => 'sh',
+ :string => 's',
+ :symbol => 'sy',
+ :tag => 'ta',
+ :tag_fat => 'tf',
+ :tag_special => 'ts',
+ :type => 'ty',
+ :variable => 'v',
+ :xml_text => 'xt',
+
+ :ident => :NO_HIGHLIGHT, # 'id'
+ #:operator => 'op',
+ :operator => :NO_HIGHLIGHT, # 'op'
+ :space => :NO_HIGHLIGHT, # 'sp'
+ :plain => :NO_HIGHLIGHT,
+ }
+ ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
+ ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
+ ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
+ ClassOfKind[:escape] = ClassOfKind[:delimiter]
+ ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb
index b76d682..d577602 100644
--- a/lib/coderay/encoders/html/css.rb
+++ b/lib/coderay/encoders/html/css.rb
@@ -1,65 +1,65 @@
-module CodeRay
-module Encoders
-
- class HTML
- class CSS
-
- attr :stylesheet
-
- def CSS.load_stylesheet style = nil
- CodeRay::Styles[style]
- end
-
- def initialize style = :default
- @classes = Hash.new
- style = CSS.load_stylesheet style
- @stylesheet = [
- style::CSS_MAIN_STYLES,
- style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
- ].join("\n")
- parse style::TOKEN_COLORS
- end
-
- def [] *styles
- cl = @classes[styles.first]
- return '' unless cl
- style = ''
- 1.upto(styles.size) do |offset|
- break if style = cl[styles[offset .. -1]]
- end
- raise 'Style not found: %p' % [styles] if $DEBUG and style.empty?
- return style
- end
-
- private
-
- CSS_CLASS_PATTERN = /
- ( (?: # $1 = classes
- \s* \. [-\w]+
- )+ )
- \s* \{ \s*
- ( [^\}]+ )? # $2 = style
- \s* \} \s*
- |
- ( . ) # $3 = error
- /mx
- def parse stylesheet
- stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error|
- raise "CSS parse error: '#{error.inspect}' not recognized" if error
- styles = classes.scan(/[-\w]+/)
- cl = styles.pop
- @classes[cl] ||= Hash.new
- @classes[cl][styles] = style.to_s.strip
- end
- end
-
- end
- end
-
-end
-end
-
-if $0 == __FILE__
- require 'pp'
- pp CodeRay::Encoders::HTML::CSS.new
-end
+module CodeRay
+module Encoders
+
+ class HTML
+ class CSS
+
+ attr :stylesheet
+
+ def CSS.load_stylesheet style = nil
+ CodeRay::Styles[style]
+ end
+
+ def initialize style = :default
+ @classes = Hash.new
+ style = CSS.load_stylesheet style
+ @stylesheet = [
+ style::CSS_MAIN_STYLES,
+ style::TOKEN_COLORS.gsub(/^(?!$)/, '.CodeRay ')
+ ].join("\n")
+ parse style::TOKEN_COLORS
+ end
+
+ def [] *styles
+ cl = @classes[styles.first]
+ return '' unless cl
+ style = ''
+ 1.upto(styles.size) do |offset|
+ break if style = cl[styles[offset .. -1]]
+ end
+ raise 'Style not found: %p' % [styles] if $DEBUG and style.empty?
+ return style
+ end
+
+ private
+
+ CSS_CLASS_PATTERN = /
+ ( (?: # $1 = classes
+ \s* \. [-\w]+
+ )+ )
+ \s* \{ \s*
+ ( [^\}]+ )? # $2 = style
+ \s* \} \s*
+ |
+ ( . ) # $3 = error
+ /mx
+ def parse stylesheet
+ stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error|
+ raise "CSS parse error: '#{error.inspect}' not recognized" if error
+ styles = classes.scan(/[-\w]+/)
+ cl = styles.pop
+ @classes[cl] ||= Hash.new
+ @classes[cl][styles] = style.to_s.strip
+ end
+ end
+
+ end
+ end
+
+end
+end
+
+if $0 == __FILE__
+ require 'pp'
+ pp CodeRay::Encoders::HTML::CSS.new
+end
diff --git a/lib/coderay/encoders/html/numerization.rb b/lib/coderay/encoders/html/numerization.rb
index 2960f87..1e4a4ed 100644
--- a/lib/coderay/encoders/html/numerization.rb
+++ b/lib/coderay/encoders/html/numerization.rb
@@ -1,122 +1,122 @@
-module CodeRay
-module Encoders
-
- class HTML
-
- module Output
-
- def numerize *args
- clone.numerize!(*args)
- end
-
-=begin NUMERIZABLE_WRAPPINGS = {
- :table => [:div, :page, nil],
- :inline => :all,
- :list => [:div, :page, nil]
- }
- NUMERIZABLE_WRAPPINGS.default = :all
-=end
- def numerize! mode = :table, options = {}
- return self unless mode
-
- options = DEFAULT_OPTIONS.merge options
-
- start = options[:line_number_start]
- unless start.is_a? Integer
- raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
- end
-
- #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode]
- #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap]
- # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
- #end
-
- bold_every = options[:bold_every]
- bolding =
- if bold_every == false
- proc { |line| line.to_s }
- elsif bold_every.is_a? Integer
- raise ArgumentError, ":bolding can't be 0." if bold_every == 0
- proc do |line|
- if line % bold_every == 0
- "<strong>#{line}</strong>" # every bold_every-th number in bold
- else
- line.to_s
- end
- end
- else
- raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
- end
-
- case mode
- when :inline
- max_width = (start + line_count).to_s.size
- line = start
- gsub!(/^/) do
- line_number = bolding.call line
- indent = ' ' * (max_width - line.to_s.size)
- res = "<span class=\"no\">#{indent}#{line_number}</span> "
- line += 1
- res
- end
-
- when :table
- # This is really ugly.
- # Because even monospace fonts seem to have different heights when bold,
- # I make the newline bold, both in the code and the line numbers.
- # FIXME Still not working perfect for Mr. Internet Exploder
- # FIXME Firefox struggles with very long codes (> 200 lines)
- line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
- line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
- line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
-
- line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
- gsub!(/\n/) { "<tt>\n</tt>" }
- wrap_in! line_numbers_table_tpl
- @wrapped_in = :div
-
- when :list
- opened_tags = []
- gsub!(/^.*$\n?/) do |line|
- line.chomp!
-
- open = opened_tags.join
- line.scan(%r!<(/)?span[^>]*>?!) do |close,|
- if close
- opened_tags.pop
- else
- opened_tags << $&
- end
- end
- close = '</span>' * opened_tags.size
-
- "<li>#{open}#{line}#{close}</li>"
- end
- wrap_in! LIST
- @wrapped_in = :div
-
- else
- raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
- [mode, [:table, :list, :inline]]
- end
-
- self
- end
-
- def line_count
- line_count = count("\n")
- position_of_last_newline = rindex(?\n)
- if position_of_last_newline
- after_last_newline = self[position_of_last_newline + 1 .. -1]
- ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
- line_count += 1 if not ends_with_newline
- end
- line_count
- end
-
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ class HTML
+
+ module Output
+
+ def numerize *args
+ clone.numerize!(*args)
+ end
+
+=begin NUMERIZABLE_WRAPPINGS = {
+ :table => [:div, :page, nil],
+ :inline => :all,
+ :list => [:div, :page, nil]
+ }
+ NUMERIZABLE_WRAPPINGS.default = :all
+=end
+ def numerize! mode = :table, options = {}
+ return self unless mode
+
+ options = DEFAULT_OPTIONS.merge options
+
+ start = options[:line_number_start]
+ unless start.is_a? Integer
+ raise ArgumentError, "Invalid value %p for :line_number_start; Integer expected." % start
+ end
+
+ #allowed_wrappings = NUMERIZABLE_WRAPPINGS[mode]
+ #unless allowed_wrappings == :all or allowed_wrappings.include? options[:wrap]
+ # raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
+ #end
+
+ bold_every = options[:bold_every]
+ bolding =
+ if bold_every == false
+ proc { |line| line.to_s }
+ elsif bold_every.is_a? Integer
+ raise ArgumentError, ":bolding can't be 0." if bold_every == 0
+ proc do |line|
+ if line % bold_every == 0
+ "<strong>#{line}</strong>" # every bold_every-th number in bold
+ else
+ line.to_s
+ end
+ end
+ else
+ raise ArgumentError, 'Invalid value %p for :bolding; false or Integer expected.' % bold_every
+ end
+
+ case mode
+ when :inline
+ max_width = (start + line_count).to_s.size
+ line = start
+ gsub!(/^/) do
+ line_number = bolding.call line
+ indent = ' ' * (max_width - line.to_s.size)
+ res = "<span class=\"no\">#{indent}#{line_number}</span> "
+ line += 1
+ res
+ end
+
+ when :table
+ # This is really ugly.
+ # Because even monospace fonts seem to have different heights when bold,
+ # I make the newline bold, both in the code and the line numbers.
+ # FIXME Still not working perfect for Mr. Internet Exploder
+ # FIXME Firefox struggles with very long codes (> 200 lines)
+ line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
+ line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
+ line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
+
+ line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
+ gsub!(/\n/) { "<tt>\n</tt>" }
+ wrap_in! line_numbers_table_tpl
+ @wrapped_in = :div
+
+ when :list
+ opened_tags = []
+ gsub!(/^.*$\n?/) do |line|
+ line.chomp!
+
+ open = opened_tags.join
+ line.scan(%r!<(/)?span[^>]*>?!) do |close,|
+ if close
+ opened_tags.pop
+ else
+ opened_tags << $&
+ end
+ end
+ close = '</span>' * opened_tags.size
+
+ "<li>#{open}#{line}#{close}</li>"
+ end
+ wrap_in! LIST
+ @wrapped_in = :div
+
+ else
+ raise ArgumentError, 'Unknown value %p for mode: expected one of %p' %
+ [mode, [:table, :list, :inline]]
+ end
+
+ self
+ end
+
+ def line_count
+ line_count = count("\n")
+ position_of_last_newline = rindex(?\n)
+ if position_of_last_newline
+ after_last_newline = self[position_of_last_newline + 1 .. -1]
+ ends_with_newline = after_last_newline[/\A(?:<\/span>)*\z/]
+ line_count += 1 if not ends_with_newline
+ end
+ line_count
+ end
+
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/html/output.rb b/lib/coderay/encoders/html/output.rb
index 61258ee..e74e55e 100644
--- a/lib/coderay/encoders/html/output.rb
+++ b/lib/coderay/encoders/html/output.rb
@@ -1,195 +1,195 @@
-module CodeRay
-module Encoders
-
- class HTML
-
- # This module is included in the output String from thew HTML Encoder.
- #
- # It provides methods like wrap, div, page etc.
- #
- # Remember to use #clone instead of #dup to keep the modules the object was
- # extended with.
- #
- # TODO: more doc.
- module Output
-
- require 'coderay/encoders/html/numerization.rb'
-
- attr_accessor :css
-
- class << self
-
- # This makes Output look like a class.
- #
- # Example:
- #
- # a = Output.new '<span class="co">Code</span>'
- # a.wrap! :page
- def new string, css = CSS.new, element = nil
- output = string.clone.extend self
- output.wrapped_in = element
- output.css = css
- output
- end
-
- # Raises an exception if an object that doesn't respond to to_str is extended by Output,
- # to prevent users from misuse. Use Module#remove_method to disable.
- def extended o
- warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
- end
-
- def make_stylesheet css, in_tag = false
- sheet = css.stylesheet
- sheet = <<-CSS if in_tag
-<style type="text/css">
-#{sheet}
-</style>
- CSS
- sheet
- end
-
- def page_template_for_css css
- sheet = make_stylesheet css
- PAGE.apply 'CSS', sheet
- end
-
- # Define a new wrapper. This is meta programming.
- def wrapper *wrappers
- wrappers.each do |wrapper|
- define_method wrapper do |*args|
- wrap wrapper, *args
- end
- define_method "#{wrapper}!".to_sym do |*args|
- wrap! wrapper, *args
- end
- end
- end
-
- end
-
- wrapper :div, :span, :page
-
- def wrapped_in? element
- wrapped_in == element
- end
-
- def wrapped_in
- @wrapped_in ||= nil
- end
- attr_writer :wrapped_in
-
- def wrap_in template
- clone.wrap_in! template
- end
-
- def wrap_in! template
- Template.wrap! self, template, 'CONTENT'
- self
- end
-
- def wrap! element, *args
- return self if not element or element == wrapped_in
- case element
- when :div
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
- wrap_in! DIV
- when :span
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
- wrap_in! SPAN
- when :page
- wrap! :div if wrapped_in? nil
- raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
- wrap_in! Output.page_template_for_css(@css)
- when nil
- return self
- else
- raise "Unknown value %p for :wrap" % element
- end
- @wrapped_in = element
- self
- end
-
- def wrap *args
- clone.wrap!(*args)
- end
-
- def stylesheet in_tag = false
- Output.make_stylesheet @css, in_tag
- end
-
- class Template < String
-
- def self.wrap! str, template, target
- target = Regexp.new(Regexp.escape("<%#{target}%>"))
- if template =~ target
- str[0,0] = $`
- str << $'
- else
- raise "Template target <%%%p%%> not found" % target
- end
- end
-
- def apply target, replacement
- target = Regexp.new(Regexp.escape("<%#{target}%>"))
- if self =~ target
- Template.new($` + replacement + $')
- else
- raise "Template target <%%%p%%> not found" % target
- end
- end
-
- module Simple
- def ` str #` <-- for stupid editors
- Template.new str
- end
- end
- end
-
- extend Template::Simple
-
-#-- don't include the templates in docu
-
- SPAN = `<span class="CodeRay"><%CONTENT%></span>`
-
- DIV = <<-`DIV`
-<div class="CodeRay">
- <div class="code"><pre><%CONTENT%></pre></div>
-</div>
- DIV
-
- TABLE = <<-`TABLE`
-<table class="CodeRay"><tr>
- <td class="line_numbers" title="click to toggle" onclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
- <td class="code"><pre ondblclick="with (this.style) { overflow = (overflow == 'auto' || overflow == '') ? 'visible' : 'auto' }"><%CONTENT%></pre></td>
-</tr></table>
- TABLE
- # title="double click to expand"
-
- LIST = <<-`LIST`
-<ol class="CodeRay"><%CONTENT%></ol>
- LIST
-
- PAGE = <<-`PAGE`
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
-<head>
- <meta http-equiv="content-type" content="text/html; charset=utf-8" />
- <title>CodeRay HTML Encoder Example</title>
- <style type="text/css">
-<%CSS%>
- </style>
-</head>
-<body style="background-color: white;">
-
-<%CONTENT%>
-</body>
-</html>
- PAGE
-
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ class HTML
+
+ # This module is included in the output String from thew HTML Encoder.
+ #
+ # It provides methods like wrap, div, page etc.
+ #
+ # Remember to use #clone instead of #dup to keep the modules the object was
+ # extended with.
+ #
+ # TODO: more doc.
+ module Output
+
+ require 'coderay/encoders/html/numerization.rb'
+
+ attr_accessor :css
+
+ class << self
+
+ # This makes Output look like a class.
+ #
+ # Example:
+ #
+ # a = Output.new '<span class="co">Code</span>'
+ # a.wrap! :page
+ def new string, css = CSS.new, element = nil
+ output = string.clone.extend self
+ output.wrapped_in = element
+ output.css = css
+ output
+ end
+
+ # Raises an exception if an object that doesn't respond to to_str is extended by Output,
+ # to prevent users from misuse. Use Module#remove_method to disable.
+ def extended o
+ warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
+ end
+
+ def make_stylesheet css, in_tag = false
+ sheet = css.stylesheet
+ sheet = <<-CSS if in_tag
+<style type="text/css">
+#{sheet}
+</style>
+ CSS
+ sheet
+ end
+
+ def page_template_for_css css
+ sheet = make_stylesheet css
+ PAGE.apply 'CSS', sheet
+ end
+
+ # Define a new wrapper. This is meta programming.
+ def wrapper *wrappers
+ wrappers.each do |wrapper|
+ define_method wrapper do |*args|
+ wrap wrapper, *args
+ end
+ define_method "#{wrapper}!".to_sym do |*args|
+ wrap! wrapper, *args
+ end
+ end
+ end
+
+ end
+
+ wrapper :div, :span, :page
+
+ def wrapped_in? element
+ wrapped_in == element
+ end
+
+ def wrapped_in
+ @wrapped_in ||= nil
+ end
+ attr_writer :wrapped_in
+
+ def wrap_in template
+ clone.wrap_in! template
+ end
+
+ def wrap_in! template
+ Template.wrap! self, template, 'CONTENT'
+ self
+ end
+
+ def wrap! element, *args
+ return self if not element or element == wrapped_in
+ case element
+ when :div
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+ wrap_in! DIV
+ when :span
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+ wrap_in! SPAN
+ when :page
+ wrap! :div if wrapped_in? nil
+ raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
+ wrap_in! Output.page_template_for_css(@css)
+ when nil
+ return self
+ else
+ raise "Unknown value %p for :wrap" % element
+ end
+ @wrapped_in = element
+ self
+ end
+
+ def wrap *args
+ clone.wrap!(*args)
+ end
+
+ def stylesheet in_tag = false
+ Output.make_stylesheet @css, in_tag
+ end
+
+ class Template < String
+
+ def self.wrap! str, template, target
+ target = Regexp.new(Regexp.escape("<%#{target}%>"))
+ if template =~ target
+ str[0,0] = $`
+ str << $'
+ else
+ raise "Template target <%%%p%%> not found" % target
+ end
+ end
+
+ def apply target, replacement
+ target = Regexp.new(Regexp.escape("<%#{target}%>"))
+ if self =~ target
+ Template.new($` + replacement + $')
+ else
+ raise "Template target <%%%p%%> not found" % target
+ end
+ end
+
+ module Simple
+ def ` str #` <-- for stupid editors
+ Template.new str
+ end
+ end
+ end
+
+ extend Template::Simple
+
+#-- don't include the templates in docu
+
+ SPAN = `<span class="CodeRay"><%CONTENT%></span>`
+
+ DIV = <<-`DIV`
+<div class="CodeRay">
+ <div class="code"><pre><%CONTENT%></pre></div>
+</div>
+ DIV
+
+ TABLE = <<-`TABLE`
+<table class="CodeRay"><tr>
+ <td class="line_numbers" title="click to toggle" onclick="with (this.firstChild.style) { display = (display == '') ? 'none' : '' }"><pre><%LINE_NUMBERS%></pre></td>
+ <td class="code"><pre ondblclick="with (this.style) { overflow = (overflow == 'auto' || overflow == '') ? 'visible' : 'auto' }"><%CONTENT%></pre></td>
+</tr></table>
+ TABLE
+ # title="double click to expand"
+
+ LIST = <<-`LIST`
+<ol class="CodeRay"><%CONTENT%></ol>
+ LIST
+
+ PAGE = <<-`PAGE`
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
+ <title>CodeRay HTML Encoder Example</title>
+ <style type="text/css">
+<%CSS%>
+ </style>
+</head>
+<body style="background-color: white;">
+
+<%CONTENT%>
+</body>
+</html>
+ PAGE
+
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb
index 96d81fe..add3862 100644
--- a/lib/coderay/encoders/null.rb
+++ b/lib/coderay/encoders/null.rb
@@ -1,26 +1,26 @@
-module CodeRay
-module Encoders
-
- # = Null Encoder
- #
- # Does nothing and returns an empty string.
- class Null < Encoder
-
- include Streamable
- register_for :null
-
- # Defined for faster processing
- def to_proc
- proc {}
- end
-
- protected
-
- def token(*)
- # do nothing
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # = Null Encoder
+ #
+ # Does nothing and returns an empty string.
+ class Null < Encoder
+
+ include Streamable
+ register_for :null
+
+ # Defined for faster processing
+ def to_proc
+ proc {}
+ end
+
+ protected
+
+ def token(*)
+ # do nothing
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/page.rb b/lib/coderay/encoders/page.rb
index 1ed7985..c08f094 100644
--- a/lib/coderay/encoders/page.rb
+++ b/lib/coderay/encoders/page.rb
@@ -1,21 +1,21 @@
-module CodeRay
-module Encoders
-
- load :html
-
- class Page < HTML
-
- FILE_EXTENSION = 'html'
-
- register_for :page
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :class,
- :wrap => :page,
- :line_numbers => :table
- })
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ load :html
+
+ class Page < HTML
+
+ FILE_EXTENSION = 'html'
+
+ register_for :page
+
+ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+ :css => :class,
+ :wrap => :page,
+ :line_numbers => :table
+ })
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb
index e892cb2..988afec 100644
--- a/lib/coderay/encoders/span.rb
+++ b/lib/coderay/encoders/span.rb
@@ -1,20 +1,20 @@
-module CodeRay
-module Encoders
-
- load :html
-
- class Span < HTML
-
- FILE_EXTENSION = 'span.html'
-
- register_for :span
-
- DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
- :css => :style,
- :wrap => :span,
- })
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ load :html
+
+ class Span < HTML
+
+ FILE_EXTENSION = 'span.html'
+
+ register_for :span
+
+ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+ :css => :style,
+ :wrap => :span,
+ })
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index f80d5c8..e2a0460 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -1,81 +1,81 @@
-module CodeRay
-module Encoders
-
- # Makes a statistic for the given tokens.
- class Statistic < Encoder
-
- include Streamable
- register_for :stats, :statistic
-
- attr_reader :type_stats, :real_token_count
-
- protected
-
- TypeStats = Struct.new :count, :size
-
- def setup options
- @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
- @real_token_count = 0
- end
-
- def generate tokens, options
- @tokens = tokens
- super
- end
-
- def text_token text, kind
- @real_token_count += 1 unless kind == :space
- @type_stats[kind].count += 1
- @type_stats[kind].size += text.size
- @type_stats['TOTAL'].size += text.size
- end
-
- # TODO Hierarchy handling
- def block_token action, kind
- #@content_type = kind
- @type_stats['open/close'].count += 1
- end
-
- def token text, kind
- super
- @type_stats['TOTAL'].count += 1
- end
-
- STATS = <<-STATS
-
-Code Statistics
-
-Tokens %8d
- Non-Whitespace %8d
-Bytes Total %8d
-
-Token Types (%d):
- type count ratio size (average)
--------------------------------------------------------------
-%s
- STATS
-# space 12007 33.81 % 1.7
- TOKEN_TYPES_ROW = <<-TKR
- %-20s %8d %6.2f %% %5.1f
- TKR
-
- def finish options
- all = @type_stats['TOTAL']
- all_count, all_size = all.count, all.size
- @type_stats.each do |type, stat|
- stat.size /= stat.count.to_f
- end
- types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v|
- TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
- end.join
- STATS % [
- all_count, @real_token_count, all_size,
- @type_stats.delete_if { |k, v| k.is_a? String }.size,
- types_stats
- ]
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # Makes a statistic for the given tokens.
+ class Statistic < Encoder
+
+ include Streamable
+ register_for :stats, :statistic
+
+ attr_reader :type_stats, :real_token_count
+
+ protected
+
+ TypeStats = Struct.new :count, :size
+
+ def setup options
+ @type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
+ @real_token_count = 0
+ end
+
+ def generate tokens, options
+ @tokens = tokens
+ super
+ end
+
+ def text_token text, kind
+ @real_token_count += 1 unless kind == :space
+ @type_stats[kind].count += 1
+ @type_stats[kind].size += text.size
+ @type_stats['TOTAL'].size += text.size
+ end
+
+ # TODO Hierarchy handling
+ def block_token action, kind
+ #@content_type = kind
+ @type_stats['open/close'].count += 1
+ end
+
+ def token text, kind
+ super
+ @type_stats['TOTAL'].count += 1
+ end
+
+ STATS = <<-STATS
+
+Code Statistics
+
+Tokens %8d
+ Non-Whitespace %8d
+Bytes Total %8d
+
+Token Types (%d):
+ type count ratio size (average)
+-------------------------------------------------------------
+%s
+ STATS
+# space 12007 33.81 % 1.7
+ TOKEN_TYPES_ROW = <<-TKR
+ %-20s %8d %6.2f %% %5.1f
+ TKR
+
+ def finish options
+ all = @type_stats['TOTAL']
+ all_count, all_size = all.count, all.size
+ @type_stats.each do |type, stat|
+ stat.size /= stat.count.to_f
+ end
+ types_stats = @type_stats.sort_by { |k, v| [-v.count, k.to_s] }.map do |k, v|
+ TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
+ end.join
+ STATS % [
+ all_count, @real_token_count, all_size,
+ @type_stats.delete_if { |k, v| k.is_a? String }.size,
+ types_stats
+ ]
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb
index 31661ef..17256c6 100644
--- a/lib/coderay/encoders/text.rb
+++ b/lib/coderay/encoders/text.rb
@@ -1,33 +1,33 @@
-module CodeRay
-module Encoders
-
- class Text < Encoder
-
- include Streamable
- register_for :text
-
- FILE_EXTENSION = 'txt'
-
- DEFAULT_OPTIONS = {
- :separator => ''
- }
-
- protected
- def setup options
- super
- @sep = options[:separator]
- end
-
- def token text, kind
- return unless text.respond_to? :to_str
- @out << text + @sep
- end
-
- def finish options
- @out.chomp @sep
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ class Text < Encoder
+
+ include Streamable
+ register_for :text
+
+ FILE_EXTENSION = 'txt'
+
+ DEFAULT_OPTIONS = {
+ :separator => ''
+ }
+
+ protected
+ def setup options
+ super
+ @sep = options[:separator]
+ end
+
+ def token text, kind
+ return unless text.respond_to? :to_str
+ @out << text + @sep
+ end
+
+ def finish options
+ @out.chomp @sep
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb
index 743cc0e..2428589 100644
--- a/lib/coderay/encoders/tokens.rb
+++ b/lib/coderay/encoders/tokens.rb
@@ -1,44 +1,44 @@
-module CodeRay
-module Encoders
-
- # The Tokens encoder converts the tokens to a simple
- # readable format. It doesn't use colors and is mainly
- # intended for console output.
- #
- # The tokens are converted with Tokens.write_token.
- #
- # The format is:
- #
- # <token-kind> \t <escaped token-text> \n
- #
- # Example:
- #
- # require 'coderay'
- # puts CodeRay.scan("puts 3 + 4", :ruby).tokens
- #
- # prints:
- #
- # ident puts
- # space
- # integer 3
- # space
- # operator +
- # space
- # integer 4
- #
- class Tokens < Encoder
-
- include Streamable
- register_for :tokens
-
- FILE_EXTENSION = 'tok'
-
- protected
- def token *args
- @out << CodeRay::Tokens.write_token(*args)
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # The Tokens encoder converts the tokens to a simple
+ # readable format. It doesn't use colors and is mainly
+ # intended for console output.
+ #
+ # The tokens are converted with Tokens.write_token.
+ #
+ # The format is:
+ #
+ # <token-kind> \t <escaped token-text> \n
+ #
+ # Example:
+ #
+ # require 'coderay'
+ # puts CodeRay.scan("puts 3 + 4", :ruby).tokens
+ #
+ # prints:
+ #
+ # ident puts
+ # space
+ # integer 3
+ # space
+ # operator +
+ # space
+ # integer 4
+ #
+ class Tokens < Encoder
+
+ include Streamable
+ register_for :tokens
+
+ FILE_EXTENSION = 'tok'
+
+ protected
+ def token *args
+ @out << CodeRay::Tokens.write_token(*args)
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb
index 21ef0cf..09e4549 100644
--- a/lib/coderay/encoders/xml.rb
+++ b/lib/coderay/encoders/xml.rb
@@ -1,71 +1,71 @@
-module CodeRay
-module Encoders
-
- # = XML Encoder
- #
- # Uses REXML. Very slow.
- class XML < Encoder
-
- include Streamable
- register_for :xml
-
- FILE_EXTENSION = 'xml'
-
- require 'rexml/document'
-
- DEFAULT_OPTIONS = {
- :tab_width => 8,
- :pretty => -1,
- :transitive => false,
- }
-
- protected
-
- def setup options
- @out = ''
- @doc = REXML::Document.new
- @doc << REXML::XMLDecl.new
- @tab_width = options[:tab_width]
- @root = @node = @doc.add_element('coderay-tokens')
- end
-
- def finish options
- @doc.write @out, options[:pretty], options[:transitive], true
- @out
- end
-
- def text_token text, kind
- if kind == :space
- token = @node
- else
- token = @node.add_element kind.to_s
- end
- text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl|
- case
- when space
- token << REXML::Text.new(space, true)
- when tab
- token << REXML::Text.new(tab, true)
- when nl
- token << REXML::Text.new(nl, true)
- else
- token << REXML::Text.new($&)
- end
- end
- end
-
- def open_token kind
- @node = @node.add_element kind.to_s
- end
-
- def close_token kind
- if @node == @root
- raise 'no token to close!'
- end
- @node = @node.parent
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # = XML Encoder
+ #
+ # Uses REXML. Very slow.
+ class XML < Encoder
+
+ include Streamable
+ register_for :xml
+
+ FILE_EXTENSION = 'xml'
+
+ require 'rexml/document'
+
+ DEFAULT_OPTIONS = {
+ :tab_width => 8,
+ :pretty => -1,
+ :transitive => false,
+ }
+
+ protected
+
+ def setup options
+ @out = ''
+ @doc = REXML::Document.new
+ @doc << REXML::XMLDecl.new
+ @tab_width = options[:tab_width]
+ @root = @node = @doc.add_element('coderay-tokens')
+ end
+
+ def finish options
+ @doc.write @out, options[:pretty], options[:transitive], true
+ @out
+ end
+
+ def text_token text, kind
+ if kind == :space
+ token = @node
+ else
+ token = @node.add_element kind.to_s
+ end
+ text.scan(/(\x20+)|(\t+)|(\n)|[^\x20\t\n]+/) do |space, tab, nl|
+ case
+ when space
+ token << REXML::Text.new(space, true)
+ when tab
+ token << REXML::Text.new(tab, true)
+ when nl
+ token << REXML::Text.new(nl, true)
+ else
+ token << REXML::Text.new($&)
+ end
+ end
+ end
+
+ def open_token kind
+ @node = @node.add_element kind.to_s
+ end
+
+ def close_token kind
+ if @node == @root
+ raise 'no token to close!'
+ end
+ @node = @node.parent
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb
index 47f64a4..5564e58 100644
--- a/lib/coderay/encoders/yaml.rb
+++ b/lib/coderay/encoders/yaml.rb
@@ -1,22 +1,22 @@
-module CodeRay
-module Encoders
-
- # = YAML Encoder
- #
- # Slow.
- class YAML < Encoder
-
- register_for :yaml
-
- FILE_EXTENSION = 'yaml'
-
- protected
- def compile tokens, options
- require 'yaml'
- @out = tokens.to_a.to_yaml
- end
-
- end
-
-end
-end
+module CodeRay
+module Encoders
+
+ # = YAML Encoder
+ #
+ # Slow.
+ class YAML < Encoder
+
+ register_for :yaml
+
+ FILE_EXTENSION = 'yaml'
+
+ protected
+ def compile tokens, options
+ require 'yaml'
+ @out = tokens.to_a.to_yaml
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb
index 9007646..432745f 100644
--- a/lib/coderay/scanners/plaintext.rb
+++ b/lib/coderay/scanners/plaintext.rb
@@ -1,15 +1,15 @@
-module CodeRay
-module Scanners
-
- class Plaintext < Scanner
-
- register_for :plaintext, :plain
-
- def scan_tokens tokens, options
- tokens << [scan_until(/\z/), :plain]
- end
-
- end
-
-end
-end
+module CodeRay
+module Scanners
+
+ class Plaintext < Scanner
+
+ register_for :plaintext, :plain
+
+ def scan_tokens tokens, options
+ tokens << [scan_until(/\z/), :plain]
+ end
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index b1e0d1b..c601011 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -1,216 +1,216 @@
-module CodeRay
-module Scanners
-
- module Ruby::Patterns # :nodoc:
-
- RESERVED_WORDS = %w[
- and def end in or unless begin
- defined? ensure module redo super until
- BEGIN break do next rescue then
- when END case else for retry
- while alias class elsif if not return
- undef yield
- ]
-
- DEF_KEYWORDS = %w[ def ]
- UNDEF_KEYWORDS = %w[ undef ]
- MODULE_KEYWORDS = %w[class module]
- DEF_NEW_STATE = WordList.new(:initial).
- add(DEF_KEYWORDS, :def_expected).
- add(UNDEF_KEYWORDS, :undef_expected).
- add(MODULE_KEYWORDS, :module_expected)
-
- IDENTS_ALLOWING_REGEXP = %w[
- and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
- ]
- REGEXP_ALLOWED = WordList.new(false).
- add(IDENTS_ALLOWING_REGEXP, :set)
-
- PREDEFINED_CONSTANTS = %w[
- nil true false self
- DATA ARGV ARGF __FILE__ __LINE__
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- IDENT = /[a-z_][\w_]*/i
-
- METHOD_NAME = / #{IDENT} [?!]? /ox
- METHOD_NAME_OPERATOR = /
- \*\*? # multiplication and power
- | [-+]@? # plus, minus
- | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
- | \[\]=? # array getter and setter
- | << | >> # append or shift left, shift right
- | <=?>? | >=? # comparison, rocket operator
- | ===? # simple equality and case equality
- /ox
- METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
- INSTANCE_VARIABLE = / @ #{IDENT} /ox
- CLASS_VARIABLE = / @@ #{IDENT} /ox
- OBJECT_VARIABLE = / @@? #{IDENT} /ox
- GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
- PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
- VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
-
- QUOTE_TO_TYPE = {
- '`' => :shell,
- '/'=> :regexp,
- }
- QUOTE_TO_TYPE.default = :string
-
- REGEXP_MODIFIERS = /[mixounse]*/
- REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
-
- DECIMAL = /\d+(?:_\d+)*/
- OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
- HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
- BINARY = /0b[01]+(?:_[01]+)*/
-
- EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
- FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
- FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
- NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
-
- SYMBOL = /
- :
- (?:
- #{METHOD_NAME_EX}
- | #{PREFIX_VARIABLE}
- | ['"]
- )
- /ox
-
- # TODO investigste \M, \c and \C escape sequences
- # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
- # assert_equal(225, ?\M-a)
- # assert_equal(129, ?\M-\C-a)
- ESCAPE = /
- [abefnrstv]
- | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
- | [0-7]{1,3}
- | x[0-9A-Fa-f]{1,2}
- | .
- /mx
-
- CHARACTER = /
- \?
- (?:
- [^\s\\]
- | \\ #{ESCAPE}
- )
- /mx
-
- # NOTE: This is not completely correct, but
- # nobody needs heredoc delimiters ending with \n.
- HEREDOC_OPEN = /
- << (-)? # $1 = float
- (?:
- ( [A-Za-z_0-9]+ ) # $2 = delim
- |
- ( ["'`] ) # $3 = quote, type
- ( [^\n]*? ) \3 # $4 = delim
- )
- /mx
-
- RUBYDOC = /
- =begin (?!\S)
- .*?
- (?: \Z | ^=end (?!\S) [^\n]* )
- /mx
-
- DATA = /
- __END__$
- .*?
- (?: \Z | (?=^\#CODE) )
- /mx
-
- RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
-
- RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
-
- # FIXME: \s and = are only a workaround, they are still allowed
- # as delimiters.
- FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
- FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
-
- FancyStringType = {
- 'q' => [:string, false],
- 'Q' => [:string, true],
- 'r' => [:regexp, true],
- 's' => [:symbol, false],
- 'x' => [:shell, true]
- }
- FancyStringType['w'] = FancyStringType['q']
- FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
-
- class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
- :paren, :paren_depth, :pattern, :next_state
-
- CLOSING_PAREN = Hash[ *%w[
- ( )
- [ ]
- < >
- { }
- ] ]
-
- CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
- OPENING_PAREN = CLOSING_PAREN.invert
-
- STRING_PATTERN = Hash.new { |h, k|
- delim, interpreted = *k
- delim_pattern = Regexp.escape(delim.dup)
- if closing_paren = CLOSING_PAREN[delim]
- delim_pattern << Regexp.escape(closing_paren)
- end
-
-
- special_escapes =
- case interpreted
- when :regexp_symbols
- '| ' + REGEXP_SYMBOLS.source
- when :words
- '| \s'
- end
-
- h[k] =
- if interpreted and not delim == '#'
- / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
- else
- / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
- end
- }
-
- HEREDOC_PATTERN = Hash.new { |h, k|
- delim, interpreted, indented = *k
- delim_pattern = Regexp.escape(delim.dup)
- delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
- h[k] =
- if interpreted
- / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
- else
- / (?= #{delim_pattern}() | \\ ) /mx
- end
- }
-
- def initialize kind, interpreted, delim, heredoc = false
- if heredoc
- pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
- delim = nil
- else
- pattern = STRING_PATTERN[ [delim, interpreted] ]
- if paren = CLOSING_PAREN[delim]
- delim, paren = paren, delim
- paren_depth = 1
- end
- end
- super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
- end
- end unless defined? StringState
-
- end
-
-end
-end
+module CodeRay
+module Scanners
+
+ module Ruby::Patterns # :nodoc:
+
+ RESERVED_WORDS = %w[
+ and def end in or unless begin
+ defined? ensure module redo super until
+ BEGIN break do next rescue then
+ when END case else for retry
+ while alias class elsif if not return
+ undef yield
+ ]
+
+ DEF_KEYWORDS = %w[ def ]
+ UNDEF_KEYWORDS = %w[ undef ]
+ MODULE_KEYWORDS = %w[class module]
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(UNDEF_KEYWORDS, :undef_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ IDENTS_ALLOWING_REGEXP = %w[
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(IDENTS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = %w[
+ nil true false self
+ DATA ARGV ARGF __FILE__ __LINE__
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ IDENT = /[a-z_][\w_]*/i
+
+ METHOD_NAME = / #{IDENT} [?!]? /ox
+ METHOD_NAME_OPERATOR = /
+ \*\*? # multiplication and power
+ | [-+]@? # plus, minus
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
+ | \[\]=? # array getter and setter
+ | << | >> # append or shift left, shift right
+ | <=?>? | >=? # comparison, rocket operator
+ | ===? # simple equality and case equality
+ /ox
+ METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
+
+ QUOTE_TO_TYPE = {
+ '`' => :shell,
+ '/'=> :regexp,
+ }
+ QUOTE_TO_TYPE.default = :string
+
+ REGEXP_MODIFIERS = /[mixounse]*/
+ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
+
+ DECIMAL = /\d+(?:_\d+)*/
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
+ NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
+
+ SYMBOL = /
+ :
+ (?:
+ #{METHOD_NAME_EX}
+ | #{PREFIX_VARIABLE}
+ | ['"]
+ )
+ /ox
+
+ # TODO investigste \M, \c and \C escape sequences
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
+ # assert_equal(225, ?\M-a)
+ # assert_equal(129, ?\M-\C-a)
+ ESCAPE = /
+ [abefnrstv]
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
+ | [0-7]{1,3}
+ | x[0-9A-Fa-f]{1,2}
+ | .
+ /mx
+
+ CHARACTER = /
+ \?
+ (?:
+ [^\s\\]
+ | \\ #{ESCAPE}
+ )
+ /mx
+
+ # NOTE: This is not completely correct, but
+ # nobody needs heredoc delimiters ending with \n.
+ HEREDOC_OPEN = /
+ << (-)? # $1 = float
+ (?:
+ ( [A-Za-z_0-9]+ ) # $2 = delim
+ |
+ ( ["'`] ) # $3 = quote, type
+ ( [^\n]*? ) \3 # $4 = delim
+ )
+ /mx
+
+ RUBYDOC = /
+ =begin (?!\S)
+ .*?
+ (?: \Z | ^=end (?!\S) [^\n]* )
+ /mx
+
+ DATA = /
+ __END__$
+ .*?
+ (?: \Z | (?=^\#CODE) )
+ /mx
+
+ RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
+
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
+
+ # FIXME: \s and = are only a workaround, they are still allowed
+ # as delimiters.
+ FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
+ FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
+
+ FancyStringType = {
+ 'q' => [:string, false],
+ 'Q' => [:string, true],
+ 'r' => [:regexp, true],
+ 's' => [:symbol, false],
+ 'x' => [:shell, true]
+ }
+ FancyStringType['w'] = FancyStringType['q']
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
+
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
+ :paren, :paren_depth, :pattern, :next_state
+
+ CLOSING_PAREN = Hash[ *%w[
+ ( )
+ [ ]
+ < >
+ { }
+ ] ]
+
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
+ OPENING_PAREN = CLOSING_PAREN.invert
+
+ STRING_PATTERN = Hash.new { |h, k|
+ delim, interpreted = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ if closing_paren = CLOSING_PAREN[delim]
+ delim_pattern << Regexp.escape(closing_paren)
+ end
+
+
+ special_escapes =
+ case interpreted
+ when :regexp_symbols
+ '| ' + REGEXP_SYMBOLS.source
+ when :words
+ '| \s'
+ end
+
+ h[k] =
+ if interpreted and not delim == '#'
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+ else
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+ end
+ }
+
+ HEREDOC_PATTERN = Hash.new { |h, k|
+ delim, interpreted, indented = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
+ h[k] =
+ if interpreted
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
+ else
+ / (?= #{delim_pattern}() | \\ ) /mx
+ end
+ }
+
+ def initialize kind, interpreted, delim, heredoc = false
+ if heredoc
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
+ delim = nil
+ else
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
+ if paren = CLOSING_PAREN[delim]
+ delim, paren = paren, delim
+ paren_depth = 1
+ end
+ end
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
+ end
+ end unless defined? StringState
+
+ end
+
+end
+end
diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb
index 5ce8ce9..ff923fb 100644
--- a/lib/coderay/scanners/xml.rb
+++ b/lib/coderay/scanners/xml.rb
@@ -1,18 +1,18 @@
-module CodeRay
-module Scanners
-
- load :html
-
- # XML Scanner
- #
- # $Id$
- #
- # Currently this is the same scanner as Scanners::HTML.
- class XML < HTML
-
- register_for :xml
-
- end
-
-end
-end
+module CodeRay
+module Scanners
+
+ load :html
+
+ # XML Scanner
+ #
+ # $Id$
+ #
+ # Currently this is the same scanner as Scanners::HTML.
+ class XML < HTML
+
+ register_for :xml
+
+ end
+
+end
+end
diff --git a/lib/coderay/style.rb b/lib/coderay/style.rb
index 057f8d4..c2977c5 100644
--- a/lib/coderay/style.rb
+++ b/lib/coderay/style.rb
@@ -1,20 +1,20 @@
-module CodeRay
-
- # This module holds the Style class and its subclasses.
- #
- # See Plugin.
- module Styles
- extend PluginHost
- plugin_path File.dirname(__FILE__), 'styles'
-
- class Style
- extend Plugin
- plugin_host Styles
-
- DEFAULT_OPTIONS = { }
-
- end
-
- end
-
-end
+module CodeRay
+
+ # This module holds the Style class and its subclasses.
+ #
+ # See Plugin.
+ module Styles
+ extend PluginHost
+ plugin_path File.dirname(__FILE__), 'styles'
+
+ class Style
+ extend Plugin
+ plugin_host Styles
+
+ DEFAULT_OPTIONS = { }
+
+ end
+
+ end
+
+end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 8b8c692..c8c62e0 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,322 +1,322 @@
-module CodeRay
-
- # = Tokens
- #
- # The Tokens class represents a list of tokens returnd from
- # a Scanner.
- #
- # A token is not a special object, just a two-element Array
- # consisting of
- # * the _token_ _kind_ (a Symbol representing the type of the token)
- # * the _token_ _text_ (the original source of the token in a String)
- #
- # A token looks like this:
- #
- # [:comment, '# It looks like this']
- # [:float, '3.1415926']
- # [:error, 'äöü']
- #
- # Some scanners also yield some kind of sub-tokens, represented by special
- # token texts, namely :open and :close .
- #
- # The Ruby scanner, for example, splits "a string" into:
- #
- # [
- # [:open, :string],
- # [:delimiter, '"'],
- # [:content, 'a string'],
- # [:delimiter, '"'],
- # [:close, :string]
- # ]
- #
- # Tokens is also the interface between Scanners and Encoders:
- # The input is split and saved into a Tokens object. The Encoder
- # then builds the output from this object.
- #
- # Thus, the syntax below becomes clear:
- #
- # CodeRay.scan('price = 2.59', :ruby).html
- # # the Tokens object is here -------^
- #
- # See how small it is? ;)
- #
- # Tokens gives you the power to handle pre-scanned code very easily:
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
- # that you put in your DB.
- #
- # Tokens' subclass TokenStream allows streaming to save memory.
- class Tokens < Array
-
- class << self
-
- # Convert the token to a string.
- #
- # This format is used by Encoders.Tokens.
- # It can be reverted using read_token.
- def write_token text, type
- if text.is_a? String
- "#{type}\t#{escape(text)}\n"
- else
- ":#{text}\t#{type}\t\n"
- end
- end
-
- # Read a token from the string.
- #
- # Inversion of write_token.
- #
- # TODO Test this!
- def read_token token
- type, text = token.split("\t", 2)
- if type[0] == ?:
- [text.to_sym, type[1..-1].to_sym]
- else
- [type.to_sym, unescape(text)]
- end
- end
-
- # Escapes a string for use in write_token.
- def escape text
- text.gsub(/[\n\\]/, '\\\\\&')
- end
-
- # Unescapes a string created by escape.
- def unescape text
- text.gsub(/\\[\n\\]/) { |m| m[1,1] }
- end
-
- end
-
- # Whether the object is a TokenStream.
- #
- # Returns false.
- def stream?
- false
- end
-
- # Iterates over all tokens.
- #
- # If a filter is given, only tokens of that kind are yielded.
- def each kind_filter = nil, &block
- unless kind_filter
- super(&block)
- else
- super() do |text, kind|
- next unless kind == kind_filter
- yield text, kind
- end
- end
- end
-
- # Iterates over all text tokens.
- # Range tokens like [:open, :string] are left out.
- #
- # Example:
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
- def each_text_token
- each do |text, kind|
- next unless text.respond_to? :to_str
- yield text, kind
- end
- end
-
- # Encode the tokens using encoder.
- #
- # encoder can be
- # * a symbol like :html oder :statistic
- # * an Encoder class
- # * an Encoder object
- #
- # options are passed to the encoder.
- def encode encoder, options = {}
- unless encoder.is_a? Encoders::Encoder
- unless encoder.is_a? Class
- encoder_class = Encoders[encoder]
- end
- encoder = encoder_class.new options
- end
- encoder.encode_tokens self, options
- end
-
-
- # Turn into a string using Encoders::Text.
- #
- # +options+ are passed to the encoder if given.
- def to_s options = {}
- encode :text, options
- end
-
-
- # Redirects unknown methods to encoder calls.
- #
- # For example, if you call +tokens.html+, the HTML encoder
- # is used to highlight the tokens.
- def method_missing meth, options = {}
- Encoders[meth].new(options).encode_tokens self
- end
-
- # Returns the tokens compressed by joining consecutive
- # tokens of the same kind.
- #
- # This can not be undone, but should yield the same output
- # in most Encoders. It basically makes the output smaller.
- #
- # Combined with dump, it saves space for the cost of time.
- #
- # If the scanner is written carefully, this is not required -
- # for example, consecutive //-comment lines could already be
- # joined in one comment token by the Scanner.
- def optimize
- print ' Tokens#optimize: before: %d - ' % size if $DEBUG
- last_kind = last_text = nil
- new = self.class.new
- each do |text, kind|
- if text.is_a? String
- if kind == last_kind
- last_text << text
- else
- new << [last_text, last_kind] if last_kind
- last_text = text
- last_kind = kind
- end
- else
- new << [last_text, last_kind] if last_kind
- last_kind = last_text = nil
- new << [text, kind]
- end
- end
- new << [last_text, last_kind] if last_kind
- print 'after: %d (%d saved = %2.0f%%)' %
- [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
- new
- end
-
- # Compact the object itself; see optimize.
- def optimize!
- replace optimize
- end
-
- # Dumps the object into a String that can be saved
- # in files or databases.
- #
- # The dump is created with Marshal.dump;
- # In addition, it is gzipped using GZip.gzip.
- #
- # The returned String object includes Undumping
- # so it has an #undump method. See Tokens.load.
- #
- # You can configure the level of compression,
- # but the default value 7 should be what you want
- # in most cases as it is a good comprimise between
- # speed and compression rate.
- #
- # See GZip module.
- def dump gzip_level = 7
- require 'coderay/helpers/gzip_simple'
- dump = Marshal.dump self
- dump = dump.gzip gzip_level
- dump.extend Undumping
- end
-
- # The total size of the tokens.
- # Should be equal to the input size before
- # scanning.
- def text_size
- map { |t, k| t }.join.size
- end
-
- # Include this module to give an object an #undump
- # method.
- #
- # The string returned by Tokens.dump includes Undumping.
- module Undumping
- # Calls Tokens.load with itself.
- def undump
- Tokens.load self
- end
- end
-
- # Undump the object using Marshal.load, then
- # unzip it using GZip.gunzip.
- #
- # The result is commonly a Tokens object, but
- # this is not guaranteed.
- def Tokens.load dump
- require 'coderay/helpers/gzip_simple'
- dump = dump.gunzip
- @dump = Marshal.load dump
- end
-
- end
-
-
- # = TokenStream
- #
- # The TokenStream class is a fake Array without elements.
- #
- # It redirects the method << to a block given at creation.
- #
- # This allows scanners and Encoders to use streaming (no
- # tokens are saved, the input is highlighted the same time it
- # is scanned) with the same code.
- #
- # See CodeRay.encode_stream and CodeRay.scan_stream
- class TokenStream < Tokens
-
- # Whether the object is a TokenStream.
- #
- # Returns true.
- def stream?
- true
- end
-
- # The Array is empty, but size counts the tokens given by <<.
- attr_reader :size
-
- # Creates a new TokenStream that calls +block+ whenever
- # its << method is called.
- #
- # Example:
- #
- # require 'coderay'
- #
- # token_stream = CodeRay::TokenStream.new do |kind, text|
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
- # end
- #
- # token_stream << [:regexp, '/\d+/']
- # #-> kind: rexpexp, text size: 5.
- #
- def initialize &block
- raise ArgumentError, 'Block expected for streaming.' unless block
- @callback = block
- @size = 0
- end
-
- # Calls +block+ with +token+ and increments size.
- #
- # Returns self.
- def << token
- @callback.call token
- @size += 1
- self
- end
-
- # This method is not implemented due to speed reasons. Use Tokens.
- def text_size
- raise NotImplementedError, 'This method is not implemented due to speed reasons.'
- end
-
- # A TokenStream cannot be dumped. Use Tokens.
- def dump
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
- end
-
- # A TokenStream cannot be optimized. Use Tokens.
- def optimize
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
- end
-
- end
-
-end
+module CodeRay
+
+ # = Tokens
+ #
+ # The Tokens class represents a list of tokens returnd from
+ # a Scanner.
+ #
+ # A token is not a special object, just a two-element Array
+ # consisting of
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
+ # * the _token_ _text_ (the original source of the token in a String)
+ #
+ # A token looks like this:
+ #
+ # [:comment, '# It looks like this']
+ # [:float, '3.1415926']
+ # [:error, 'äöü']
+ #
+ # Some scanners also yield some kind of sub-tokens, represented by special
+ # token texts, namely :open and :close .
+ #
+ # The Ruby scanner, for example, splits "a string" into:
+ #
+ # [
+ # [:open, :string],
+ # [:delimiter, '"'],
+ # [:content, 'a string'],
+ # [:delimiter, '"'],
+ # [:close, :string]
+ # ]
+ #
+ # Tokens is also the interface between Scanners and Encoders:
+ # The input is split and saved into a Tokens object. The Encoder
+ # then builds the output from this object.
+ #
+ # Thus, the syntax below becomes clear:
+ #
+ # CodeRay.scan('price = 2.59', :ruby).html
+ # # the Tokens object is here -------^
+ #
+ # See how small it is? ;)
+ #
+ # Tokens gives you the power to handle pre-scanned code very easily:
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
+ # that you put in your DB.
+ #
+ # Tokens' subclass TokenStream allows streaming to save memory.
+ class Tokens < Array
+
+ class << self
+
+ # Convert the token to a string.
+ #
+ # This format is used by Encoders.Tokens.
+ # It can be reverted using read_token.
+ def write_token text, type
+ if text.is_a? String
+ "#{type}\t#{escape(text)}\n"
+ else
+ ":#{text}\t#{type}\t\n"
+ end
+ end
+
+ # Read a token from the string.
+ #
+ # Inversion of write_token.
+ #
+ # TODO Test this!
+ def read_token token
+ type, text = token.split("\t", 2)
+ if type[0] == ?:
+ [text.to_sym, type[1..-1].to_sym]
+ else
+ [type.to_sym, unescape(text)]
+ end
+ end
+
+ # Escapes a string for use in write_token.
+ def escape text
+ text.gsub(/[\n\\]/, '\\\\\&')
+ end
+
+ # Unescapes a string created by escape.
+ def unescape text
+ text.gsub(/\\[\n\\]/) { |m| m[1,1] }
+ end
+
+ end
+
+ # Whether the object is a TokenStream.
+ #
+ # Returns false.
+ def stream?
+ false
+ end
+
+ # Iterates over all tokens.
+ #
+ # If a filter is given, only tokens of that kind are yielded.
+ def each kind_filter = nil, &block
+ unless kind_filter
+ super(&block)
+ else
+ super() do |text, kind|
+ next unless kind == kind_filter
+ yield text, kind
+ end
+ end
+ end
+
+ # Iterates over all text tokens.
+ # Range tokens like [:open, :string] are left out.
+ #
+ # Example:
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
+ def each_text_token
+ each do |text, kind|
+ next unless text.respond_to? :to_str
+ yield text, kind
+ end
+ end
+
+ # Encode the tokens using encoder.
+ #
+ # encoder can be
+ # * a symbol like :html oder :statistic
+ # * an Encoder class
+ # * an Encoder object
+ #
+ # options are passed to the encoder.
+ def encode encoder, options = {}
+ unless encoder.is_a? Encoders::Encoder
+ unless encoder.is_a? Class
+ encoder_class = Encoders[encoder]
+ end
+ encoder = encoder_class.new options
+ end
+ encoder.encode_tokens self, options
+ end
+
+
+ # Turn into a string using Encoders::Text.
+ #
+ # +options+ are passed to the encoder if given.
+ def to_s options = {}
+ encode :text, options
+ end
+
+
+ # Redirects unknown methods to encoder calls.
+ #
+ # For example, if you call +tokens.html+, the HTML encoder
+ # is used to highlight the tokens.
+ def method_missing meth, options = {}
+ Encoders[meth].new(options).encode_tokens self
+ end
+
+ # Returns the tokens compressed by joining consecutive
+ # tokens of the same kind.
+ #
+ # This can not be undone, but should yield the same output
+ # in most Encoders. It basically makes the output smaller.
+ #
+ # Combined with dump, it saves space for the cost of time.
+ #
+ # If the scanner is written carefully, this is not required -
+ # for example, consecutive //-comment lines could already be
+ # joined in one comment token by the Scanner.
+ def optimize
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
+ last_kind = last_text = nil
+ new = self.class.new
+ each do |text, kind|
+ if text.is_a? String
+ if kind == last_kind
+ last_text << text
+ else
+ new << [last_text, last_kind] if last_kind
+ last_text = text
+ last_kind = kind
+ end
+ else
+ new << [last_text, last_kind] if last_kind
+ last_kind = last_text = nil
+ new << [text, kind]
+ end
+ end
+ new << [last_text, last_kind] if last_kind
+ print 'after: %d (%d saved = %2.0f%%)' %
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
+ new
+ end
+
+ # Compact the object itself; see optimize.
+ def optimize!
+ replace optimize
+ end
+
+ # Dumps the object into a String that can be saved
+ # in files or databases.
+ #
+ # The dump is created with Marshal.dump;
+ # In addition, it is gzipped using GZip.gzip.
+ #
+ # The returned String object includes Undumping
+ # so it has an #undump method. See Tokens.load.
+ #
+ # You can configure the level of compression,
+ # but the default value 7 should be what you want
+ # in most cases as it is a good comprimise between
+ # speed and compression rate.
+ #
+ # See GZip module.
+ def dump gzip_level = 7
+ require 'coderay/helpers/gzip_simple'
+ dump = Marshal.dump self
+ dump = dump.gzip gzip_level
+ dump.extend Undumping
+ end
+
+ # The total size of the tokens.
+ # Should be equal to the input size before
+ # scanning.
+ def text_size
+ map { |t, k| t }.join.size
+ end
+
+ # Include this module to give an object an #undump
+ # method.
+ #
+ # The string returned by Tokens.dump includes Undumping.
+ module Undumping
+ # Calls Tokens.load with itself.
+ def undump
+ Tokens.load self
+ end
+ end
+
+ # Undump the object using Marshal.load, then
+ # unzip it using GZip.gunzip.
+ #
+ # The result is commonly a Tokens object, but
+ # this is not guaranteed.
+ def Tokens.load dump
+ require 'coderay/helpers/gzip_simple'
+ dump = dump.gunzip
+ @dump = Marshal.load dump
+ end
+
+ end
+
+
+ # = TokenStream
+ #
+ # The TokenStream class is a fake Array without elements.
+ #
+ # It redirects the method << to a block given at creation.
+ #
+ # This allows scanners and Encoders to use streaming (no
+ # tokens are saved, the input is highlighted the same time it
+ # is scanned) with the same code.
+ #
+ # See CodeRay.encode_stream and CodeRay.scan_stream
+ class TokenStream < Tokens
+
+ # Whether the object is a TokenStream.
+ #
+ # Returns true.
+ def stream?
+ true
+ end
+
+ # The Array is empty, but size counts the tokens given by <<.
+ attr_reader :size
+
+ # Creates a new TokenStream that calls +block+ whenever
+ # its << method is called.
+ #
+ # Example:
+ #
+ # require 'coderay'
+ #
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
+ # end
+ #
+ # token_stream << [:regexp, '/\d+/']
+ # #-> kind: rexpexp, text size: 5.
+ #
+ def initialize &block
+ raise ArgumentError, 'Block expected for streaming.' unless block
+ @callback = block
+ @size = 0
+ end
+
+ # Calls +block+ with +token+ and increments size.
+ #
+ # Returns self.
+ def << token
+ @callback.call token
+ @size += 1
+ self
+ end
+
+ # This method is not implemented due to speed reasons. Use Tokens.
+ def text_size
+ raise NotImplementedError, 'This method is not implemented due to speed reasons.'
+ end
+
+ # A TokenStream cannot be dumped. Use Tokens.
+ def dump
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
+ end
+
+ # A TokenStream cannot be optimized. Use Tokens.
+ def optimize
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
+ end
+
+ end
+
+end