summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb')
-rw-r--r--storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb350
1 files changed, 281 insertions, 69 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb
index dc003f88948..66dad9ea403 100644
--- a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb
+++ b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb
@@ -2,19 +2,13 @@ require "scan_info_data"
module Groonga
class ScanInfoBuilder
- module Status
- START = 0
- VAR = 1
- COL1 = 2
- COL2 = 3
- CONST = 4
- end
-
- def initialize(expression, operator, size)
+ def initialize(expression, operator, record_exist)
@data_list = []
@expression = expression
@operator = operator
- @size = size
+ @record_exist = record_exist
+ @variable = @expression[0]
+ @table = Context.instance[@variable.domain]
end
RELATION_OPERATORS = [
@@ -35,6 +29,7 @@ module Groonga
Operator::GEO_WITHINP8,
Operator::TERM_EXTRACT,
Operator::REGEXP,
+ Operator::FUZZY,
]
ARITHMETIC_OPERATORS = [
@@ -60,67 +55,111 @@ module Groonga
def build
return nil unless valid?
- status = Status::START
- variable = @expression.get_var_by_offset(0)
- data = nil
- codes = @expression.codes
- n_codes = codes.size
- codes.each_with_index do |code, i|
- case code.op
+ context = BuildContext.new(@expression)
+ codes = context.codes
+ while context.have_next?
+ code = context.code
+ code_op = context.code_op
+ i = context.i
+ context.next
+
+ case code_op
when *RELATION_OPERATORS
- status = Status::START
- data.op = code.op
+ context.status = :start
+ data = context.data
+ data.op = code_op
data.end = i
+ data.weight = code.value.value if code.value
data.match_resolve_index
@data_list << data
- data = nil
+ context.data = nil
when *LOGICAL_OPERATORS
- put_logical_op(code.op, i)
+ if context.status == :const
+ data = context.data
+ data.op = Operator::PUSH
+ data.end = data.start
+ @data_list << data
+ context.data = nil
+ end
+ put_logical_op(code_op, i)
# TODO: rescue and return nil
- status = Status::START
+ context.status = :start
when Operator::PUSH
- data ||= ScanInfoData.new(i)
- if code.value == variable
- status = Status::VAR
+ context.data ||= ScanInfoData.new(i)
+ data = context.data
+ if code.value == @variable
+ context.status = :var
else
data.args << code.value
- if status == Status::START
+ if context.status == :start
data.flags |= ScanInfo::Flags::PRE_CONST
end
- status = Status::CONST
+ context.status = :const
+ end
+ if code.modify > 0 and
+ LOGICAL_OPERATORS.include?(codes[i + code.modify].op)
+ data.op = Operator::PUSH
+ data.end = data.start
+ @data_list << data
+ context.data = nil
+ context.status = :start
end
when Operator::GET_VALUE
- case status
- when Status::START
- data ||= ScanInfoData.new(i)
- status = Status::COL1
+ case context.status
+ when :start
+ context.data ||= ScanInfoData.new(i)
+ data = context.data
+ context.status = :column1
data.args << code.value
- when Status::CONST, Status::VAR
- status = Status::COL1
+ when :const, :var
+ context.status = :column1
data.args << code.value
- when Status::COL1
- raise ErrorMessage, "invalid expression: can't use column as a value: <#{code.value.name}>: <#{@expression.grn_inspect}>"
- status = Status::COL2
- when Status::COL2
+ when :column1
+ message = "invalid expression: can't use column as a value: "
+ message << "<#{code.value.name}>: <#{@expression.grn_inspect}>"
+ raise ErrorMessage, message
+ when :column2
# Do nothing
+ else
+ message = "internal expression parsing error: unknown status: "
+ message << "<#{context.status.inspect}>: "
+ message << "<#{@expression.grn_inspect}>"
+ raise ErrorMessage, message
end
when Operator::CALL
- data ||= ScanInfoData.new(i)
+ context.data ||= ScanInfoData.new(i)
+ data = context.data
if (code.flags & ExpressionCode::Flags::RELATIONAL_EXPRESSION) != 0 or
- (i + 1) == n_codes
- status = Status::START
- data.op = code.op
+ (not context.have_next?)
+ context.status = :start
+ data.op = code_op
data.end = i
data.call_relational_resolve_indexes
@data_list << data
- data = nil
+ context.data = nil
else
- status = Status::COL2
+ context.status = :column2
end
+ when Operator::GET_REF
+ context.data ||= ScanInfoData.new(i)
+ case context.status
+ when :start
+ data = context.data
+ context.status = :column1
+ data.args << code.value
+ end
+ when Operator::GET_MEMBER
+ data = context.data
+ index = data.args.pop
+ data.start_position = index.value
+ context.status = :column1
+ when Operator::NOT
+ success = build_not(context, code, i)
+ return nil unless success
end
end
- if @operator == Operator::OR and @size == 0
+ if @operator == Operator::OR and !@record_exist
first_data = @data_list.first
if (first_data.flags & ScanInfo::Flags::PUSH) == 0 or
first_data.logical_op != @operator
@@ -130,7 +169,7 @@ module Groonga
first_data.logical_op = @operator
end
else
- put_logical_op(@operator, n_codes)
+ put_logical_op(@operator, context.n_codes)
end
optimize
@@ -140,38 +179,54 @@ module Groonga
def valid?
n_relation_expressions = 0
n_logical_expressions = 0
- status = Status::START
- variable = @expression.get_var_by_offset(0)
+ status = :start
codes = @expression.codes
- codes.each do |code|
+ codes.each_with_index do |code, i|
case code.op
when *RELATION_OPERATORS
- return false if status < Status::COL1
- return false if status > Status::CONST
- status = Status::START
+ if status == :start || status == :var
+ return false
+ end
+ status = :start
n_relation_expressions += 1
when *ARITHMETIC_OPERATORS
- return false if status < Status::COL1
- return false if status > Status::CONST
- status = Status::START
+ if status == :start || status == :var
+ return false
+ end
+ status = :start
return false if n_relation_expressions != (n_logical_expressions + 1)
when *LOGICAL_OPERATORS
- return false if status != Status::START
- n_logical_expressions += 1
- return false if n_logical_expressions >= n_relation_expressions
+ case status
+ when :start
+ n_logical_expressions += 1
+ return false if n_logical_expressions >= n_relation_expressions
+ when :const
+ n_logical_expressions += 1
+ n_relation_expressions += 1
+ return false if n_logical_expressions >= n_relation_expressions
+ status = :start
+ else
+ return false
+ end
when Operator::PUSH
- if code.value == variable
- status = Status::VAR
+ if code.modify > 0 and
+ LOGICAL_OPERATORS.include?(codes[i + code.modify].op)
+ n_relation_expressions += 1
+ status = :start
else
- status = Status::CONST
+ if code.value == @variable
+ status = :var
+ else
+ status = :const
+ end
end
when Operator::GET_VALUE
case status
- when Status::START, Status::CONST, Status::VAR
- status = Status::COL1
- when Status::COL1
- status = Status::COL2
- when Status::COL2
+ when :start, :const, :var
+ status = :column1
+ when :column1
+ status = :column2
+ when :column2
# Do nothing
else
return false
@@ -179,17 +234,34 @@ module Groonga
when Operator::CALL
if (code.flags & ExpressionCode::Flags::RELATIONAL_EXPRESSION) != 0 or
code == codes.last
- status = Status::START
+ status = :start
n_relation_expressions += 1
else
- status = Status::COL2
+ status = :column2
+ end
+ when Operator::GET_REF
+ case status
+ when :start
+ status = :column1
+ else
+ return false
end
+ when Operator::GET_MEMBER
+ case status
+ when :const
+ return false unless codes[i - 1].value.value.is_a?(Integer)
+ status = :column1
+ else
+ return false
+ end
+ when Operator::NOT
+ # Do nothing
else
return false
end
end
- return false if status != Status::START
+ return false if status != :start
return false if n_relation_expressions != (n_logical_expressions + 1)
true
@@ -258,6 +330,61 @@ module Groonga
end
end
+ def build_not(context, code, i)
+ last_data = @data_list.last
+ return false if last_data.nil?
+
+ case last_data.op
+ when Operator::LESS
+ last_data.op = Operator::GREATER_EQUAL
+ last_data.end += 1
+ when Operator::LESS_EQUAL
+ last_data.op = Operator::GREATER
+ last_data.end += 1
+ when Operator::GREATER
+ last_data.op = Operator::LESS_EQUAL
+ last_data.end += 1
+ when Operator::GREATER_EQUAL
+ last_data.op = Operator::LESS
+ last_data.end += 1
+ when Operator::NOT_EQUAL
+ last_data.op = Operator::EQUAL
+ last_data.end += 1
+ else
+ if @data_list.size == 1
+ if last_data.search_indexes.empty?
+ if last_data.op == Operator::EQUAL
+ last_data.op = Operator::NOT_EQUAL
+ last_data.end += 1
+ else
+ return false
+ end
+ else
+ last_data.logical_op = Operator::AND_NOT
+ last_data.flags &= ~ScanInfo::Flags::PUSH
+ @data_list.unshift(create_all_match_data)
+ end
+ else
+ next_code = context.code
+ return false if next_code.nil?
+
+ case next_code.op
+ when Operator::AND
+ context.code_op = Operator::AND_NOT
+ when Operator::AND_NOT
+ context.code_op = Operator::AND
+ when Operator::OR
+ @data_list[-1, 0] = create_all_match_data
+ put_logical_op(Operator::AND_NOT, i)
+ else
+ return false
+ end
+ end
+ end
+
+ true
+ end
+
def optimize
optimized_data_list = []
i = 0
@@ -278,7 +405,45 @@ module Groonga
end
optimized_data_list << data
end
- optimized_data_list
+
+ optimize_by_estimated_size(optimized_data_list)
+ end
+
+ def optimize_by_estimated_size(data_list)
+ return data_list unless Groonga::ORDER_BY_ESTIMATED_SIZE
+
+ start_index = nil
+ data_list.size.times do |i|
+ data = data_list[i]
+ if data.logical_op != Operator::AND
+ if start_index.nil?
+ start_index = i
+ else
+ sort_by_estimated_size!(data_list, start_index...i)
+ start_index = nil
+ end
+ end
+ end
+ unless start_index.nil?
+ sort_by_estimated_size!(data_list, start_index...data_list.size)
+ end
+ data_list
+ end
+
+ def sort_by_estimated_size!(data_list, range)
+ target_data_list = data_list[range]
+ return if target_data_list.size < 2
+
+ start_logical_op = target_data_list.first.logical_op
+ sorted_data_list = target_data_list.sort_by do |data|
+ estimator = ScanInfoDataSizeEstimator.new(data, @table)
+ estimator.estimate
+ end
+ sorted_data_list.each do |sorted_data|
+ sorted_data.logical_op = Operator::AND
+ end
+ sorted_data_list.first.logical_op = start_logical_op
+ data_list[range] = sorted_data_list
end
def range_operations?(data, next_data)
@@ -314,6 +479,17 @@ module Groonga
end
end
+ def create_all_match_data
+ data = ScanInfoData.new(0)
+ data.end = 0
+ data.flags = ScanInfo::Flags::PUSH
+ data.op = Operator::CALL
+ data.logical_op = Operator::OR
+ data.args = [Context.instance["all_records"]]
+ data.search_indexes = []
+ data
+ end
+
def create_between_data(data, next_data)
between_data = ScanInfoData.new(data.start)
between_data.end = next_data.end + 1
@@ -361,5 +537,41 @@ module Groonga
@expression.allocate_constant(max_border),
]
end
+
+ class BuildContext
+ attr_accessor :status
+ attr_reader :codes
+ attr_reader :n_codes
+ attr_reader :i
+ attr_writer :code_op
+ attr_accessor :data
+ def initialize(expression)
+ @expression = expression
+ @status = :start
+ @current_data = nil
+ @codes = @expression.codes
+ @n_codes = @codes.size
+ @i = 0
+ @code_op = nil
+ @data = nil
+ end
+
+ def have_next?
+ @i < @n_codes
+ end
+
+ def next
+ @i += 1
+ @code_op = nil
+ end
+
+ def code
+ @codes[@i]
+ end
+
+ def code_op
+ @code_op || code.op
+ end
+ end
end
end