From abb92f30b12e11781afa76f43a344627520b5b34 Mon Sep 17 00:00:00 2001 From: Eric Guo Date: Sun, 8 Jul 2012 14:32:28 +0800 Subject: New: *Go Encoder* Draft version, copy from c --- lib/coderay/helpers/file_type.rb | 37 ++++---- lib/coderay/scanners/go.rb | 195 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 18 deletions(-) create mode 100644 lib/coderay/scanners/go.rb diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 637001b..5159054 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -1,5 +1,5 @@ module CodeRay - + # = FileType # # A simple filetype recognizer. @@ -8,18 +8,18 @@ module CodeRay # # # determine the type of the given # lang = FileType[file_name] - # + # # # return :text if the file type is unknown # lang = FileType.fetch file_name, :text - # + # # # try the shebang line, too # lang = FileType.fetch file_name, :text, true module FileType - + UnknownFileType = Class.new Exception - + class << self - + # Try to determine the file type of the file. # # +filename+ is a relative or absolute path to a file. @@ -30,7 +30,7 @@ module CodeRay name = File.basename filename ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot - + type = TypeFromExt[ext] || TypeFromExt[ext.downcase] || @@ -39,10 +39,10 @@ module CodeRay TypeFromName[name] || TypeFromName[name.downcase] type ||= shebang(filename) if read_shebang - + type end - + # This works like Hash#fetch. # # If the filetype cannot be found, the +default+ value @@ -51,7 +51,7 @@ module CodeRay if default && block_given? warn 'Block supersedes default value argument; use either.' end - + if type = self[filename, read_shebang] type else @@ -60,9 +60,9 @@ module CodeRay raise UnknownFileType, 'Could not determine type of %p.' % filename end end - + protected - + def shebang filename return unless File.exist? filename File.open filename, 'r' do |f| @@ -73,9 +73,9 @@ module CodeRay end end end - + end - + TypeFromExt = { 'c' => :c, 'cfc' => :xml, @@ -86,6 +86,7 @@ module CodeRay 'dpr' => :delphi, 'erb' => :erb, 'gemspec' => :ruby, + 'go' => :go, 'groovy' => :groovy, 'gvy' => :groovy, 'h' => :c, @@ -128,16 +129,16 @@ module CodeRay for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp end - + TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ - + TypeFromName = { 'Capfile' => :ruby, 'Rakefile' => :ruby, 'Rantfile' => :ruby, 'Gemfile' => :ruby, } - + end - + end diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb new file mode 100644 index 0000000..4431ef2 --- /dev/null +++ b/lib/coderay/scanners/go.rb @@ -0,0 +1,195 @@ +module CodeRay +module Scanners + + # Scanner for Go, copy from c + class Go < Scanner + + register_for :go + file_extension 'go' + + # http://golang.org/ref/spec#Keywords + KEYWORDS = [ + 'break', 'default', 'func', 'interface', 'select', + 'case', 'defer', 'go', 'map', 'struct', + 'chan', 'else', 'goto', 'package', 'switch', + 'const', 'fallthrough', 'if', 'range', 'type', + 'continue', 'for', 'import', 'return', 'var', + ] # :nodoc: + + # http://golang.org/ref/spec#Types + PREDEFINED_TYPES = [ + 'bool', + 'uint8', 'uint16', 'uint32', 'uint64', + 'int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'complex64', 'complex128', + 'byte', 'rune', + 'uint', 'int', 'uintptr', + ] # :nodoc: + + PREDEFINED_CONSTANTS = [ + 'nil', 'iota', + 'true', 'false', + ] # :nodoc: + + DIRECTIVES = [ + 'go_no_directive', # Seems no directive concept in Go? + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token 'L', :modifier + match = '"' + end + encoder.text_token match, :delimiter + state = :string + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\\n"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end -- cgit v1.2.1