1 files changed, 194 insertions, 0 deletions
diff --git a/lib/chef_zero/solr/solr_parser.rb b/lib/chef_zero/solr/solr_parser.rb
new file mode 100644
index 0000000..589b78f
--- /dev/null
+++ b/lib/chef_zero/solr/solr_parser.rb
@@ -0,0 +1,194 @@
+require 'chef_zero/solr/query/binary_operator'
+require 'chef_zero/solr/query/unary_operator'
+require 'chef_zero/solr/query/term'
+require 'chef_zero/solr/query/phrase'
+require 'chef_zero/solr/query/range_query'
+require 'chef_zero/solr/query/subquery'
+
+module ChefZero
+  module Solr
+    class SolrParser
+      def initialize(query_string)
+        @query_string = query_string
+        @index = 0
+      end
+
+      def parse
+        read_expression
+      end
+
+      #
+      # Tokenization
+      #
+      def peek_token
+        @next_token ||= parse_token
+      end
+
+      def next_token
+        result = peek_token
+        @next_token = nil
+        result
+      end
+
+      def parse_token
+        # Skip whitespace
+        skip_whitespace
+        return nil if eof?
+
+        # Operators
+        operator = peek_operator_token
+        if operator
+          @index+=operator.length
+          operator
+        else
+          # Everything that isn't whitespace or an operator, is part of a term
+          # (characters plus backslashed escaped characters)
+          start_index = @index
+          begin
+            if @query_string[@index] == '\\'
+              @index+=1
+            end
+            @index+=1 if !eof?
+          end until eof? || @query_string[@index] =~ /\s/ || peek_operator_token
+          @query_string[start_index..@index-1]
+        end
+      end
+
+      def skip_whitespace
+        if @query_string[@index] =~ /\s/
+          whitespace = /\s+/.match(@query_string, @index)
+          @index += whitespace[0].length
+        end
+      end
+
+      def peek_operator_token
+        if ['"', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', ':'].include?(@query_string[@index])
+          return @query_string[@index]
+        else
+          result = @query_string[@index..@index+1]
+          if ['&&', '||'].include?(result)
+            return result
+          end
+        end
+        nil
+      end
+
+      def eof?
+        !@next_token && @index >= @query_string.length
+      end
+
+      # Parse tree creation
+      def read_expression
+        result = read_single_expression
+        # Expression is over when we hit a close paren or eof
+        # (peek_token has the side effect of skipping whitespace for us, so we
+        # really know if we're at eof or not)
+        until peek_token == ')' || eof?
+          operator = peek_token
+          if binary_operator?(operator)
+            next_token
+          else
+            # If 2 terms are next to each other, the default operator is OR
+            operator = 'OR'
+          end
+          next_expression = read_single_expression
+
+          # Build the operator, taking precedence into account
+          if result.is_a?(Query::BinaryOperator) &&
+             binary_operator_precedence(operator) > binary_operator_precedence(result.operator)
+            # a+b*c -> a+(b*c)
+            new_right = Query::BinaryOperator.new(result.right, operator, next_expression)
+            result = Query::BinaryOperator.new(result.left, result.operator, new_right)
+          else
+            # a*b+c -> (a*b)+c
+            result = Query::BinaryOperator.new(result, operator, next_expression)
+          end
+        end
+        result
+      end
+
+      def parse_error(token, str)
+        error = "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}"
+        puts error
+        raise error
+      end
+
+      def read_single_expression
+        token = next_token
+        # If EOF, we have a problem Houston
+        if !token
+          parse_error(nil, "Expected expression!")
+
+        # If it's an unary operand, build that
+        elsif unary_operator?(token)
+          operand = read_single_expression
+          # TODO We rely on all unary operators having higher precedence than all
+          # binary operators.  Check if this is the case.
+          Query::UnaryOperator.new(token, operand)
+
+        # If it's the start of a phrase, read the terms in the phrase
+        elsif token == '"'
+          # Read terms until close "
+          phrase_terms = []
+          until (term = next_token) == '"'
+            phrase_terms << Query::Term.new(term)
+          end
+          Query::Phrase.new(phrase_terms)
+
+        # If it's the start of a range query, build that
+        elsif token == '{' || token == '['
+          left = next_token
+          parse_error(left, "Expected left term in range query") if !left
+          to = next_token
+          parse_error(left, "Expected TO in range query") if to != "TO"
+          right = next_token
+          parse_error(right, "Expected left term in range query") if !right
+          end_range = next_token
+          parse_error(right, "Expected end range '#{expected_end_range}") if !['{', '['].include?(end_range)
+          Query::RangeQuery.new(left, right, token == '[', end_range == ']')
+
+        elsif token == '('
+          subquery = read_expression
+          close_paren = next_token
+          parse_error(close_paren, "Expected ')'") if close_paren != ')'
+          Query::Subquery.new(subquery)
+
+        # If it's the end of a closure, raise an exception
+        elsif ['}',']',')'].include?(token)
+          parse_error(token, "Unexpected end paren")
+
+        # If it's a binary operator, raise an exception
+        elsif binary_operator?(token)
+          parse_error(token, "Unexpected binary operator")
+
+        # Otherwise it's a term.
+        else
+          Query::Term.new(token)
+        end
+      end
+
+      def unary_operator?(token)
+        [ 'NOT', '+', '-' ].include?(token)
+      end
+
+      def binary_operator?(token)
+        [ 'AND', 'OR', '^', ':'].include?(token)
+      end
+
+      def binary_operator_precedence(token)
+        case token
+        when '^'
+          4
+        when ':'
+          3
+        when 'AND'
+          2
+        when 'OR'
+          1
+        end
+      end
+
+      DEFAULT_FIELD = 'text'
+    end
+  end
+end