summaryrefslogtreecommitdiff
path: root/etc/todo/scanners/bash.rb
blob: d5c5d0f314f81fcf67ee4ec5116fb9ecf9f9f588 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# author: Vincent Landgraf <setcool@gmx.de>
# licence: GPLv2.1
require "rubygems"
require "coderay"

module CodeRay
  module Scanners
    class Bash < Scanner
      include CodeRay::Streamable
      register_for :bash
  
      KEYWORDS = Regexp.new("(%s)(?![a-zA-Z0-9_\-])" % %w{
        if fi until while done for do case in esac select 
        break else then shift function
      }.sort.join('|'))
    
      BUILTIN =  Regexp.new("(%s)(?![a-zA-Z0-9_\-])" % %w{
        cd continue eval exec true false suspend unalias
        exit export getopts hash pwd readonly return test
        times trap umask unset alias bind builtin caller
        command declare echo enable help let local logout
        printf read shopt source type typeset ulimit
        set dirs popd pushd bg fg jobs kill wait disown
      }.sort.join('|'))
  
      GLOBAL_VARIABLES = Regexp.new("(%s)(?![a-zA-Z0-9_\-])" % %w{
        CDPATH HOME IFS MAIL MAILPATH OPTARG LINENO LINES
        OPTIND PATH PS1 PS2 BASH BASH_ARGCBASH_ARGV 
        BASH_COMMAND BASH_ENV BASH_EXECUTION_STRING
        BASH_LINENO BASH_REMATCH BASH_SOURCE COLUMNS
        BASH_SUBSHELL BASH_VERSINFO BASH_VERSION OSTYPE
        COMP_CWORD COMP_LINE COMP_POINT COMP_WORDBREAKS
        COMP_WORDS COMPREPLY DIRSTACK EMACS EUID OTPERR
        FCEDIT FIGNORE FUNCNAME GLOBIGNORE GROUPS OLDPWD
        histchars HISTCMD HISTCONTROL HISTFILE MACHTYPE
        HISTFILESIZE HISTIGNORE HISTSIZE HISTTIMEFOMAT
        HOSTFILE HOSTNAME HOSTTYPE IGNOREEOF INPUTRC LANG
        LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_NUMERIC
        PIPESTATUS POSIXLY_CORRECT MAILCHECK PPID PS3 PS4
        PROMPT_COMMAND PWD RANDOM REPLY SECONDS SHELL
        SHELLOPTS SHLVL TIMEFORMAT TMOUT TMPDIR UID
      }.sort.join('|'))
  
      VARIABLE_SIMPLE = /\$[a-zA-Z]\w*/
  
      VARIABLE_EXPRESSION = /\$\{[!#]?[a-zA-Z].*?\}/
  
      CONSTANT = /\$[@#?\-$!_0-9]/
  
      def scan_tokens (tokens, options)
        state = :initial
        str_delimiter = nil
    
        until eos?
          if state == :initial
            if match = scan(CONSTANT)
              tokens << [match, :constant]
            elsif match = scan(/(#{VARIABLE_SIMPLE}|#{VARIABLE_EXPRESSION})/)
              tokens << [match, :instance_variable]
            elsif match = scan(/\s+/)
              tokens << [match, :space]
            elsif match = scan(/-[a-zA-Z]\w*(=\w*)?/)
              tokens << [match, :argument]
            elsif match = scan(/[;<>~]|[&]{1,2}|[|]{1,2}|\*/)
              tokens << [match, :operator]
            elsif match = scan(/[1-9][0-9]*/)
              tokens << [match, :number]
            elsif ((!tokens.empty? and tokens.last[1] != :escape) or tokens.empty? ) and 
              (str_delimiter = scan(/["'`]/))
              # don't match if last token is backsplash
              tokens << [:open, :string]
              tokens << [str_delimiter, :delimiter]
              state = :string
            elsif match = scan(/\\/)
              tokens << [match, :escape]
            elsif match = scan(KEYWORDS)
              tokens << [match, :reserved]
            elsif match = scan(BUILTIN)
              tokens << [match, :method]
            elsif match = scan(GLOBAL_VARIABLES)
              tokens << [match, :global_variable]
            elsif match = scan(/[a-zA-Z]\w*/)
              tokens << [match, :ident]
            elsif match = scan(/\#!.*/) # until eof
              tokens << [match, :doctype]
            elsif match = scan(/\#.*/) # until eof  
              tokens << [match, :comment]
            # catch the rest as other
            else c = getch
              tokens << [c, :other]
            end
          elsif state == :string
            if match = scan(/[\\][abefnrtv\\#{str_delimiter}]/)
              tokens << [match, :escape]
            elsif match = scan(CONSTANT)
              tokens << [:open, :inline]
              tokens << [match, :constant]
              tokens << [:close, :inline]
            elsif match = scan(/(#{VARIABLE_SIMPLE}|#{VARIABLE_EXPRESSION})/)
              tokens << [:open, :inline]
              tokens << [match, :instance_variable]
              tokens << [:close, :inline]
            elsif match = scan(/[^\n#{str_delimiter}\\][^\n#{str_delimiter}$\\]*/)
              tokens << [match, :content]
            elsif match = scan(Regexp.new(str_delimiter))
              tokens << [match, :delimiter]
              tokens << [:close, :string]
              state = :initial
            elsif scan(/\n/)
              tokens << [:close, :string]
              state = :initial
            else
              raise 'String: else-case reached', tokens
            end
          else
            raise 'else-case reached', tokens
          end
        end
    
        return tokens
      end
    end
  end
end