summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/ruby/patterns.rb
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2005-12-10 04:20:30 +0000
committermurphy <murphy@rubychan.de>2005-12-10 04:20:30 +0000
commitc1b6c6339f602b8ecf69c8cb330319d99e039063 (patch)
tree4ba62de2ff5a2ac7e0f13b5622b0044c45f684f9 /lib/coderay/scanners/ruby/patterns.rb
parent7c96846e16ae60d8797ef37d83cb6b0f0553ef2e (diff)
downloadcoderay-c1b6c6339f602b8ecf69c8cb330319d99e039063.tar.gz
Plugin System extended:
- helper method - path names are expanded encoders/html.rb, div.rb, span.rb, ruby.rb: - using new methods (fixes problems with rdoc and test tasks): renamed scanners/ruby/helper.rb to patterns.rb encoders/statistic.rb: sorting made deterministic (for unit testing)
Diffstat (limited to 'lib/coderay/scanners/ruby/patterns.rb')
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb213
1 files changed, 213 insertions, 0 deletions
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
new file mode 100644
index 0000000..ad649da
--- /dev/null
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -0,0 +1,213 @@
+module CodeRay module Scanners
+
+ class Ruby
+
+ RESERVED_WORDS = %w[
+ and def end in or unless begin
+ defined? ensure module redo super until
+ BEGIN break do next rescue then
+ when END case else for retry
+ while alias class elsif if not return
+ undef yield
+ ]
+
+ DEF_KEYWORDS = %w[ def ]
+ UNDEF_KEYWORDS = %w[ undef ]
+ MODULE_KEYWORDS = %w[class module]
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(UNDEF_KEYWORDS, :undef_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ IDENTS_ALLOWING_REGEXP = %w[
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(IDENTS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = %w[
+ nil true false self
+ DATA ARGV ARGF __FILE__ __LINE__
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ IDENT = /[a-z_][\w_]*/i
+
+ METHOD_NAME = / #{IDENT} [?!]? /ox
+ METHOD_NAME_OPERATOR = /
+ \*\*? # multiplication and power
+ | [-+]@? # plus, minus
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
+ | \[\]=? # array getter and setter
+ | << | >> # append or shift left, shift right
+ | <=?>? | >=? # comparison, rocket operator
+ | ===? # simple equality and case equality
+ /ox
+ METHOD_NAME_EX = / #{IDENT} [?!=]? | #{METHOD_NAME_OPERATOR} /ox
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
+
+ QUOTE_TO_TYPE = {
+ '`' => :shell,
+ '/'=> :regexp,
+ }
+ QUOTE_TO_TYPE.default = :string
+
+ REGEXP_MODIFIERS = /[mixounse]*/
+ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
+
+ DECIMAL = /\d+(?:_\d+)*/
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
+ NUMERIC = / (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} /ox
+
+ SYMBOL = /
+ :
+ (?:
+ #{METHOD_NAME_EX}
+ | #{PREFIX_VARIABLE}
+ | ['"]
+ )
+ /ox
+
+ # TODO investigste \M, \c and \C escape sequences
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
+ # assert_equal(225, ?\M-a)
+ # assert_equal(129, ?\M-\C-a)
+ ESCAPE = /
+ [abefnrstv]
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
+ | [0-7]{1,3}
+ | x[0-9A-Fa-f]{1,2}
+ | .
+ /mx
+
+ CHARACTER = /
+ \?
+ (?:
+ [^\s\\]
+ | \\ #{ESCAPE}
+ )
+ /mx
+
+ # NOTE: This is not completely correct, but
+ # nobody needs heredoc delimiters ending with \n.
+ HEREDOC_OPEN = /
+ << (-)? # $1 = float
+ (?:
+ ( [A-Za-z_0-9]+ ) # $2 = delim
+ |
+ ( ["'`] ) # $3 = quote, type
+ ( [^\n]*? ) \3 # $4 = delim
+ )
+ /mx
+
+ RUBYDOC = /
+ =begin (?!\S)
+ .*?
+ (?: \Z | ^=end (?!\S) [^\n]* )
+ /mx
+
+ DATA = /
+ __END__$
+ .*?
+ (?: \Z | (?=^\#CODE) )
+ /mx
+
+ RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
+
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
+
+ FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
+
+ FancyStringType = {
+ 'q' => [:string, false],
+ 'Q' => [:string, true],
+ 'r' => [:regexp, true],
+ 's' => [:symbol, false],
+ 'x' => [:shell, true],
+ 'w' => [:string, :word],
+ 'W' => [:string, :word],
+ }
+ FancyStringType['w'] = FancyStringType['q']
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
+
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
+ :paren, :paren_depth, :pattern, :next_state
+
+ CLOSING_PAREN = Hash[ *%w[
+ ( )
+ [ ]
+ < >
+ { }
+ ] ]
+
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
+ OPENING_PAREN = CLOSING_PAREN.invert
+
+ STRING_PATTERN = Hash.new { |h, k|
+ delim, interpreted = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ if starter = OPENING_PAREN[delim]
+ delim_pattern << Regexp.escape(starter)
+ end
+
+
+ special_escapes =
+ case interpreted
+ when :regexp_symbols
+ '| ' + REGEXP_SYMBOLS.source
+ when :words
+ '| \s'
+ end
+
+ h[k] =
+ if interpreted and not delim == '#'
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+ else
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+ end
+ }
+
+ HEREDOC_PATTERN = Hash.new { |h, k|
+ delim, interpreted, indented = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
+ h[k] =
+ if interpreted
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
+ else
+ / (?= #{delim_pattern}() | \\ ) /mx
+ end
+ }
+
+ def initialize kind, interpreted, delim, heredoc = false
+ if paren = CLOSING_PAREN[delim]
+ delim, paren = paren, delim
+ paren_depth = 1
+ end
+ if heredoc
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
+ delim = nil
+ else
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
+ end
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
+ end
+ end unless defined? StringState
+
+ end
+
+end end