diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/coderay/helpers/file_type.rb | 5 | ||||
| -rw-r--r-- | lib/coderay/scanners/_map.rb | 12 | ||||
| -rw-r--r-- | lib/coderay/scanners/php.rb | 284 | ||||
| -rw-r--r-- | lib/coderay/scanners/sql.rb | 159 | 
4 files changed, 456 insertions, 4 deletions
| diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 7f472d6..3b57608 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -96,6 +96,10 @@ module FileType      'json' => :json,      'mab' => :ruby,      'patch' => :diff, +    'php' => :php, +    'php3' => :php, +    'php4' => :php, +    'php5' => :php,      'py' => :python,      'py3' => :python,      'pyw' => :python, @@ -105,6 +109,7 @@ module FileType      'rbw' => :ruby,      'rhtml' => :rhtml,      'sch' => :scheme, +    'sql' => :sql,      'ss' => :scheme,      'xhtml' => :xhtml,      'xml' => :xml, diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index c9ac75f..9f08d7d 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,18 @@  module CodeRay  module Scanners -  map :cpp => :c, -    :plain => :plaintext, -    :pascal => :delphi, +  map \ +    :cpp => :c, +    :ecma => :java_script, +    :ecmascript => :java_script, +    :ecma_script => :java_script,      :irb => :ruby, -    :xhtml => :nitro_xhtml,      :javascript => :java_script,      :js => :java_script,      :nitro => :nitro_xhtml, +    :pascal => :delphi, +    :plain => :plaintext, +    :xhtml => :html,      :yml => :yaml    default :plain diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb new file mode 100644 index 0000000..0912ea5 --- /dev/null +++ b/lib/coderay/scanners/php.rb @@ -0,0 +1,284 @@ +class Regexp +  def |(other) +    Regexp.union(self, other) +  end +  def +(other) +    /#{self}#{other}/ +  end +end +module CodeRay +module Scanners + +  load :html +   +  # TODO: Complete rewrite. This scanner is buggy. +  class PHP < Scanner + +    register_for :php +    file_extension 'php' + +    def setup +      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true +    end + +    def reset_instance +      super +      @html_scanner.reset +    end + +    module Words +      ControlKeywords = %w! +        if else elseif while do for switch case default declare foreach as +        endif endwhile endfor endforeach endswitch enddeclare return break +        continue exit die try catch throw  +      ! +      OtherKeywords = %w! +        function class extends implements instanceof parent self var const +        private public protected static abstract final global new echo include +        require include_once require_once eval print use unset isset empty +        interface list array clone null true false +      ! + +      SpecialConstants = %w! __LINE__ __FILE__ __CLASS__ +        __METHOD__ __FUNCTION__  +      ! +      IdentKinds = WordList.new(:ident). +        add(ControlKeywords, :reserved). +        add(OtherKeywords, :pre_type). +        add(SpecialConstants, :pre_constant) +    end +    module RE +      def self.build_alternatives(array) +        Regexp.new(array.map { |s| Regexp.escape(s) }.join('|') , Regexp::IGNORECASE) +      end + +      PHPStart = / +        <script language="php"> | +        <script language='php'> | +        <\?php                   | +        <\?(?!xml)               | +        <% +      /xi + +      PHPEnd = %r! +        </script> | +        \?>        | +        %> +      !xi + +      IChar = /[a-z0-9_\x80-\xFF]/i +      IStart = /[a-z_\x80-\xFF]/i +      Identifier = /#{IStart}#{IChar}*/ +      Variable = /\$#{Identifier}/ + +      Typecasts = build_alternatives %w! +        float double real int integer bool boolean string array object null +      !.map{|s| "(#{s})"} +      OneLineComment1 = %r!//.*?(?=#{PHPEnd}|$)! +      OneLineComment2 = %r!#.*?(?=#{PHPEnd}|$)! +      OneLineComment = OneLineComment1 | OneLineComment2 + +      HereDoc = /<<</ + Identifier + +      binops = %w! +        + - * / << >> & | ^ . %  +      ! + +      ComparisionOperator = build_alternatives %w$ +        === !== == != <= >=  +      $ +      IncDecOperator = build_alternatives %w! ++ -- ! + +      BinaryOperator = build_alternatives binops +      AssignOperator = build_alternatives binops.map {|s| "${s}=" } +      LogicalOperator = build_alternatives %w! and or xor not ! +      ObjectOperator = build_alternatives %w! -> :: ! +      OtherOperator = build_alternatives %w$ => = ? : [ ] ( ) ; , ~ ! @ > <$ + +      Operator = ComparisionOperator | IncDecOperator | LogicalOperator | +        ObjectOperator | AssignOperator | BinaryOperator | OtherOperator + + +      S = /\s+/ +         +      Integer = /-?0x[0-9a-fA-F]/ | /-?\d+/ +      Float = /-?(?:\d+\.\d*|\d*\.\d+)(?:e[+-]\d+)?/ + +    end + +    def scan_tokens tokens, options +      states = [:php] +      heredocdelim = nil + +      until eos? +         +        match = nil +        kind = nil +         +        case states.last +        when :html +          if scan RE::PHPStart +            kind = :delimiter +            states.pop +          else +            match = scan_until(/(?=#{RE::PHPStart})/o) || scan_until(/\z/) +            @html_scanner.tokenize match if not match.empty? +            kind = :space +            match = '' +          end +         +        when :php +          if scan RE::PHPEnd +            kind = :delimiter +            states.push :html + +          elsif scan RE::S +            kind = :space + +          elsif scan(/\/\*/) +            kind = :comment +            states.push :mlcomment + +          elsif scan RE::OneLineComment  +            kind = :comment + +          elsif match = scan(RE::Identifier) +            kind = Words::IdentKinds[match] +            if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case' +#             match << scan(/:/) +              kind = :label +            elsif kind == :ident and match =~ /^[A-Z]/ +              kind = :constant +            end + +          elsif scan RE::Integer  +            kind = :integer + +          elsif scan RE::Float +            kind = :float + +          elsif scan(/'/) +            kind = :delimiter +            states.push :sqstring + +          elsif scan(/"/) +            kind = :delimiter +            states.push :dqstring + +          elsif match = scan(RE::HereDoc) +            heredocdelim = match[RE::Identifier] +            kind = :delimiter +            # states.push :heredocstring + +          elsif scan RE::Variable +            kind = :local_variable + +          elsif scan(/\{/) +            kind = :operator +            states.push :php + +          elsif scan(/\}/) +            if states.length == 1 +              kind = :error +            else +              kind = :operator +              states.pop +            end + +          elsif scan RE::Operator +            kind = :operator + +          else +            getch +            kind = :error + +          end + +        when :mlcomment +          if scan(/(?:[^\n\r\f*]|\*(?!\/))+/) +            kind = :comment + +          elsif scan(/\*\//) +            kind = :comment +            states.pop + +          elsif scan(/[\r\n\f]+/) +            kind = :space +          end + +        when :sqstring +          if scan(/[^\r\n\f'\\]+/) +            kind = :string +          elsif match = scan(/\\\\|\\'/) +            kind = :char +          elsif scan(/\\/) +            kind = :string +          elsif scan(/[\r\n\f ]+/) +            kind = :space +          elsif scan(/'/) +            kind = :delimiter +            states.pop +          end + +        when :dqstring +#todo: $foo[bar] kind of stuff +          if scan(/[^\r\n\f"${\\]+/) +            kind = :string +          elsif scan(/\\x[a-fA-F]{2}/) +            kind = :char +          elsif scan(/\\\d{3}/) +            kind = :char +          elsif scan(/\\["\\abcfnrtyv]/) +            kind = :char +          elsif scan(/\\/) +            kind = :string +          elsif scan(/[\r\n\f]+/) +            kind = :space +          elsif match = scan(/#{RE::Variable}/o) +            kind = :local_variable +            if check(/\[#{RE::Identifier}\]/o) +              match << scan(/\[#{RE::Identifier}\]/o) +            elsif check(/\[/) +              match << scan(/\[#{RE::Identifier}?/o) +              kind = :error +            elsif check(/->#{RE::Identifier}/o) +              match << scan(/->#{RE::Identifier}/o) +            end +          elsif scan(/\{/) +            if check(/\$/) +              kind = :operator  +              states.push :php +            else +              kind = :string +            end +            match = '{' +          elsif scan(/\$\{#{RE::Identifier}\}/o) +            kind = :local_variable +          elsif scan(/\$/) +            kind = :string +          elsif scan(/"/) +            kind = :delimiter +            states.pop +          end +        else +          raise_inspect 'Unknown state!', tokens, states +        end + +        match ||= matched +        if $DEBUG and not kind +          raise_inspect 'Error token %p in line %d' % +            [[match, kind], line], tokens, states +        end +        raise_inspect 'Empty token', tokens, states unless match + +        tokens << [match, kind] + +      end +      tokens + +    end + +  end + +end +end diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb new file mode 100644 index 0000000..9ba0b4d --- /dev/null +++ b/lib/coderay/scanners/sql.rb @@ -0,0 +1,159 @@ +module CodeRay module Scanners +   +  # by Josh Goebel +  class SQL < Scanner + +    register_for :sql +     +    RESERVED_WORDS = %w( +      create table index trigger drop primary key set select +      insert update delete replace into +      on from values before and or if exists case when +      then else as group order by avg where +      join inner outer union engine not +      like end using collate show columns begin +    ) +     +    PREDEFINED_TYPES = %w( +      char varchar enum binary text tinytext mediumtext +      longtext blob tinyblob mediumblob longblob timestamp +      date time datetime year double decimal float int +      integer tinyint mediumint bigint smallint unsigned bit +      bool boolean hex bin oct +    ) +     +    PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg ) +     +    DIRECTIVES = %w( auto_increment unique default charset ) + +    PREDEFINED_CONSTANTS = %w( null true false ) +     +    IDENT_KIND = CaseIgnoringWordList.new(:ident). +      add(RESERVED_WORDS, :reserved). +      add(PREDEFINED_TYPES, :pre_type). +      add(PREDEFINED_CONSTANTS, :pre_constant). +      add(PREDEFINED_FUNCTIONS, :predefined). +      add(DIRECTIVES, :directive) +     +    ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx +    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x +     +    STRING_PREFIXES = /[xnb]|_\w+/i + +    def scan_tokens tokens, options + +      state = :initial +      string_type = nil +      string_content = '' + +      until eos? + +        kind = nil +        match = nil + +        if state == :initial +           +          if scan(/ \s+ | \\\n /x) +            kind = :space +           +          elsif scan(/^(?:--\s?|#).*/) +            kind = :comment + +          elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx) +            kind = :comment + +          elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x) +            kind = :operator +             +          elsif scan(/(#{STRING_PREFIXES})?([`"'])/o) +            prefix = self[1] +            string_type = self[2] +            tokens << [:open, :string] +            tokens << [prefix, :modifier] if prefix +            match = string_type +            state = :string +            kind = :delimiter +             +          elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) +            kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase] +             +          elsif scan(/0[xX][0-9A-Fa-f]+/) +            kind = :hex +             +          elsif scan(/0[0-7]+(?![89.eEfF])/) +            kind = :oct +             +          elsif scan(/(?>\d+)(?![.eEfF])/) +            kind = :integer +             +          elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) +            kind = :float + +          else +            getch +            kind = :error +             +          end +           +        elsif state == :string +          if match = scan(/[^\\"'`]+/) +            string_content << match +            next +          elsif match = scan(/["'`]/) +            if string_type == match +              if peek(1) == string_type  # doubling means escape +                string_content << string_type << getch +                next +              end +              unless string_content.empty? +                tokens << [string_content, :content] +                string_content = '' +              end +              tokens << [matched, :delimiter] +              tokens << [:close, :string] +              state = :initial +              string_type = nil +              next +            else +              string_content << match +            end +            next +          elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) +            unless string_content.empty? +              tokens << [string_content, :content] +              string_content = '' +            end +            kind = :char +          elsif match = scan(/ \\ . /mox) +            string_content << match +            next +          elsif scan(/ \\ | $ /x) +            unless string_content.empty? +              tokens << [string_content, :content] +              string_content = '' +            end +            kind = :error +            state = :initial +          else +            raise "else case \" reached; %p not handled." % peek(1), tokens +          end +           +        else +          raise 'else-case reached', tokens +           +        end +         +        match ||= matched +#        raise [match, kind], tokens if kind == :error +         +        tokens << [match, kind] +         +      end +#      RAILS_DEFAULT_LOGGER.info tokens.inspect +      tokens +       +    end + +  end + +end end
\ No newline at end of file | 
