require 'chef_zero/solr/query/binary_operator'
require 'chef_zero/solr/query/unary_operator'
require 'chef_zero/solr/query/term'
require 'chef_zero/solr/query/phrase'
require 'chef_zero/solr/query/range_query'
require 'chef_zero/solr/query/subquery'

module ChefZero
  module Solr
    class SolrParser
      def initialize(query_string)
        @query_string = query_string
        @index = 0
      end

      def parse
        read_expression
      end

      #
      # Tokenization
      #
      def peek_token
        @next_token ||= parse_token
      end

      def next_token
        result = peek_token
        @next_token = nil
        result
      end

      def parse_token
        # Skip whitespace
        skip_whitespace
        return nil if eof?

        # Operators
        operator = peek_operator_token
        if operator
          @index+=operator.length
          operator
        else
          # Everything that isn't whitespace or an operator, is part of a term
          # (characters plus backslashed escaped characters)
          start_index = @index
          begin
            if @query_string[@index] == '\\'
              @index+=1
            end
            @index+=1 if !eof?
          end while !eof? && peek_term_token
          @query_string[start_index..@index-1]
        end
      end

      def skip_whitespace
        if @query_string[@index] =~ /\s/
          whitespace = /\s+/.match(@query_string, @index) || peek
          @index += whitespace[0].length
        end
      end

      def peek_term_token
        return nil if @query_string[@index] =~ /\s/
        op = peek_operator_token
        return !op || op == '-'
      end

      def peek_operator_token
        if ['"', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', ':'].include?(@query_string[@index])
          return @query_string[@index]
        else
          result = @query_string[@index..@index+1]
          if ['&&', '||'].include?(result)
            return result
          end
        end
        nil
      end

      def eof?
        !@next_token && @index >= @query_string.length
      end

      # Parse tree creation
      def read_expression
        result = read_single_expression
        # Expression is over when we hit a close paren or eof
        # (peek_token has the side effect of skipping whitespace for us, so we
        # really know if we're at eof or not)
        until peek_token == ')' || eof?
          operator = peek_token
          if binary_operator?(operator)
            next_token
          else
            # If 2 terms are next to each other, the default operator is OR
            operator = 'OR'
          end
          next_expression = read_single_expression

          # Build the operator, taking precedence into account
          if result.is_a?(Query::BinaryOperator) &&
             binary_operator_precedence(operator) > binary_operator_precedence(result.operator)
            # a+b*c -> a+(b*c)
            new_right = Query::BinaryOperator.new(result.right, operator, next_expression)
            result = Query::BinaryOperator.new(result.left, result.operator, new_right)
          else
            # a*b+c -> (a*b)+c
            result = Query::BinaryOperator.new(result, operator, next_expression)
          end
        end
        result
      end

      def parse_error(token, str)
        raise "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}"
      end

      def read_single_expression
        token = next_token
        # If EOF, we have a problem Houston
        if !token
          parse_error(nil, "Expected expression!")

        # If it's an unary operand, build that
        elsif unary_operator?(token)
          operand = read_single_expression
          # TODO We rely on all unary operators having higher precedence than all
          # binary operators.  Check if this is the case.
          Query::UnaryOperator.new(token, operand)

        # If it's the start of a phrase, read the terms in the phrase
        elsif token == '"'
          # Read terms until close "
          phrase_terms = []
          until (term = next_token) == '"'
            phrase_terms << Query::Term.new(term)
          end
          Query::Phrase.new(phrase_terms)

        # If it's the start of a range query, build that
        elsif token == '{' || token == '['
          left = next_token
          parse_error(left, "Expected left term in range query") if !left
          to = next_token
          parse_error(left, "Expected TO in range query") if to != "TO"
          right = next_token
          parse_error(right, "Expected left term in range query") if !right
          end_range = next_token
          parse_error(right, "Expected end range '#{end_range}") if !['}', ']'].include?(end_range)
          Query::RangeQuery.new(left, right, token == '[', end_range == ']')

        elsif token == '('
          subquery = read_expression
          close_paren = next_token
          parse_error(close_paren, "Expected ')'") if close_paren != ')'
          Query::Subquery.new(subquery)

        # If it's the end of a closure, raise an exception
        elsif ['}',']',')'].include?(token)
          parse_error(token, "Unexpected end paren")

        # If it's a binary operator, raise an exception
        elsif binary_operator?(token)
          parse_error(token, "Unexpected binary operator")

        # Otherwise it's a term.
        else
          term = Query::Term.new(token)
          if peek_token == ':'
            Query::BinaryOperator.new(term, next_token, read_single_expression)
          else
            term
          end
        end
      end

      def unary_operator?(token)
        [ 'NOT', '+', '-' ].include?(token)
      end

      def binary_operator?(token)
        [ 'AND', 'OR', '^', ':'].include?(token)
      end

      def binary_operator_precedence(token)
        case token
        when '^'
          4
        when ':'
          3
        when 'AND'
          2
        when 'OR'
          1
        end
      end

      DEFAULT_FIELD = 'text'
    end
  end
end