summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb')
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb364
1 files changed, 282 insertions, 82 deletions
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
index 94ae8600d2b..1c8f8644695 100644
--- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
@@ -14,6 +14,7 @@ module Groonga
"offset",
"limit",
"output_columns",
+ "use_range_index",
])
def run_body(input)
@@ -55,7 +56,26 @@ module Groonga
end
end
+ private
+ def cache_key(input)
+ key = "logical_range_filter\0"
+ key << "#{input[:logical_table]}\0"
+ key << "#{input[:shard_key]}\0"
+ key << "#{input[:min]}\0"
+ key << "#{input[:min_border]}\0"
+ key << "#{input[:max]}\0"
+ key << "#{input[:max_border]}\0"
+ key << "#{input[:order]}\0"
+ key << "#{input[:filter]}\0"
+ key << "#{input[:offset]}\0"
+ key << "#{input[:limit]}\0"
+ key << "#{input[:output_columns]}\0"
+ key << "#{input[:use_range_index]}\0"
+ key
+ end
+
class ExecuteContext
+ attr_reader :use_range_index
attr_reader :enumerator
attr_reader :order
attr_reader :filter
@@ -68,6 +88,7 @@ module Groonga
attr_reader :threshold
def initialize(input)
@input = input
+ @use_range_index = parse_use_range_index(@input[:use_range_index])
@enumerator = LogicalEnumerator.new("logical_range_filter", @input)
@order = parse_order(@input, :order)
@filter = @input[:filter]
@@ -93,6 +114,17 @@ module Groonga
end
private
+ def parse_use_range_index(use_range_index)
+ case use_range_index
+ when "yes"
+ true
+ when "no"
+ false
+ else
+ nil
+ end
+ end
+
def parse_order(input, name)
order = input[name]
return :ascending if order.nil?
@@ -123,23 +155,21 @@ module Groonga
end
def execute
- first_table = nil
+ first_shard = nil
enumerator = @context.enumerator
+ target_range = enumerator.target_range
if @context.order == :descending
each_method = :reverse_each
else
each_method = :each
end
- enumerator.send(each_method) do |table, shard_key, shard_range|
- first_table ||= table
- next if table.empty?
-
- shard_executor = ShardExecutor.new(@context,
- table, shard_key, shard_range)
+ enumerator.send(each_method) do |shard, shard_range|
+ first_shard ||= shard
+ shard_executor = ShardExecutor.new(@context, shard, shard_range)
shard_executor.execute
break if @context.current_limit == 0
end
- if first_table.nil?
+ if first_shard.nil?
message =
"[logical_range_filter] no shard exists: " +
"logical_table: <#{enumerator.logical_table}>: " +
@@ -148,17 +178,16 @@ module Groonga
end
if @context.result_sets.empty?
result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC,
- :key_type => first_table)
+ :key_type => first_shard.table)
@context.result_sets << result_set
end
end
end
class ShardExecutor
- def initialize(context, table, shard_key, shard_range)
+ def initialize(context, shard, shard_range)
@context = context
- @table = table
- @shard_key = shard_key
+ @shard = shard
@shard_range = shard_range
@filter = @context.filter
@@ -168,137 +197,279 @@ module Groonga
@target_range = @context.enumerator.target_range
@cover_type = @target_range.cover_type(@shard_range)
-
- @expression_builder = RangeExpressionBuilder.new(@shard_key,
- @target_range,
- @filter)
end
def execute
return if @cover_type == :none
+ return if @shard.table.empty?
+
+ shard_key = @shard.key
+ if shard_key.nil?
+ message = "[logical_range_filter] shard_key doesn't exist: " +
+ "<#{@shard.key_name}>"
+ raise InvalidArgument, message
+ end
+
+ expression_builder = RangeExpressionBuilder.new(shard_key,
+ @target_range)
+ expression_builder.filter = @filter
- index_info = @shard_key.find_index(Operator::LESS)
+ index_info = shard_key.find_index(Operator::LESS)
if index_info
range_index = index_info.index
- range_index = nil unless use_range_index?(range_index)
+ unless use_range_index?(range_index, expression_builder)
+ range_index = nil
+ end
else
range_index = nil
end
- case @cover_type
- when :all
- filter_shard_all(range_index)
- when :partial_min
- if range_index
- filter_by_range(range_index,
- @target_range.min, @target_range.min_border,
- nil, nil)
- else
- filter_table do |expression|
- @expression_builder.build_partial_min(expression)
- end
- end
- when :partial_max
- if range_index
- filter_by_range(range_index,
- nil, nil,
- @target_range.max, @target_range.max_border)
- else
- filter_table do |expression|
- @expression_builder.build_partial_max(expression)
- end
- end
- when :partial_min_and_max
- if range_index
- filter_by_range(range_index,
- @target_range.min, @target_range.min_border,
- @target_range.max, @target_range.max_border)
- else
- filter_table do |expression|
- @expression_builder.build_partial_min_and_max(expression)
- end
- end
- end
+ execute_filter(range_index, expression_builder)
end
private
- def use_range_index?(range_index)
+ def decide_use_range_index(use, reason, line, method)
+ message = "[logical_range_filter]"
+ if use
+ message << "[range-index] "
+ else
+ message << "[select] "
+ end
+ message << "<#{@shard.table_name}>: "
+ message << reason
+ Context.instance.logger.log(Logger::Level::DEBUG,
+ __FILE__,
+ line,
+ method.to_s,
+ message)
+
+ use
+ end
+
+ def use_range_index?(range_index, expression_builder)
+ use_range_index_parameter_message =
+ "force by use_range_index parameter"
+ case @context.use_range_index
+ when true
+ return decide_use_range_index(true,
+ use_range_index_parameter_message,
+ __LINE__, __method__)
+ when false
+ return decide_use_range_index(false,
+ use_range_index_parameter_message,
+ __LINE__, __method__)
+ end
+
+ range_index_logical_parameter_message =
+ "force by range_index logical parameter"
+ case Parameters.range_index
+ when :always
+ return decide_use_range_index(true,
+ range_index_logical_parameter_message,
+ __LINE__, __method__)
+ when :never
+ return decide_use_range_index(false,
+ range_index_logical_parameter_message,
+ __LINE__, __method__)
+ end
+
current_limit = @context.current_limit
if current_limit < 0
- return false
+ reason = "limit is negative: <#{current_limit}>"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
end
required_n_records = @context.current_offset + current_limit
- max_n_records = @table.size
+ max_n_records = @shard.table.size
if max_n_records <= required_n_records
- return false
+ reason = "the number of required records (#{required_n_records}) "
+ reason << ">= "
+ reason << "the number of records in shard (#{max_n_records})"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
end
threshold = @context.threshold
if threshold <= 0.0
- return true
+ reason = "threshold is negative: <#{threshold}>"
+ return decide_use_range_index(true, reason,
+ __LINE__, __method__)
end
if threshold >= 1.0
- return false
+ reason = "threshold (#{threshold}) >= 1.0"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
end
+ table = @shard.table
estimated_n_records = 0
case @cover_type
when :all
if @filter
- create_expression(@table) do |expression|
- @expression_builder.build_all(expression)
- estimated_n_records = expression.estimate_size(@table)
+ create_expression(table) do |expression|
+ expression_builder.build_all(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
end
else
estimated_n_records = max_n_records
end
when :partial_min
- create_expression(@table) do |expression|
- @expression_builder.build_partial_min(expression)
- estimated_n_records = expression.estimate_size(@table)
+ create_expression(table) do |expression|
+ expression_builder.build_partial_min(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
end
when :partial_max
- create_expression(@table) do |expression|
- @expression_builder.build_partial_max(expression)
- estimated_n_records = expression.estimate_size(@table)
+ create_expression(table) do |expression|
+ expression_builder.build_partial_max(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
end
when :partial_min_and_max
- create_expression(@table) do |expression|
- @expression_builder.build_partial_min_and_max(expression)
- estimated_n_records = expression.estimate_size(@table)
+ create_expression(table) do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
end
end
if estimated_n_records <= required_n_records
- return false
+ reason = "the number of required records (#{required_n_records}) "
+ reason << ">= "
+ reason << "the number of estimated records (#{estimated_n_records})"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
end
hit_ratio = estimated_n_records / max_n_records.to_f
- hit_ratio >= threshold
+ use_range_index_by_hit_ratio = (hit_ratio >= threshold)
+ if use_range_index_by_hit_ratio
+ relation = ">="
+ else
+ relation = "<"
+ end
+ reason = "hit ratio "
+ reason << "(#{hit_ratio}=#{estimated_n_records}/#{max_n_records}) "
+ reason << "#{relation} threshold (#{threshold})"
+ decide_use_range_index(use_range_index_by_hit_ratio, reason,
+ __LINE__, __method__)
+ end
+
+ def range_index_available_expression?(expression, line, method_name)
+ nested_reference_vector_column_accessor =
+ find_nested_reference_vector_column_accessor(expression)
+ if nested_reference_vector_column_accessor
+ reason = "nested reference vector column accessor can't be used: "
+ reason << "<#{nested_reference_vector_column_accessor.name}>"
+ return decide_use_range_index(false, reason, line, method_name)
+ end
+
+ selector_only_procedure = find_selector_only_procedure(expression)
+ if selector_only_procedure
+ reason = "selector only procedure can't be used: "
+ reason << "<#{selector_only_procedure.name}>"
+ return decide_use_range_index(false, reason, line, method_name)
+ end
+
+ true
+ end
+
+ def find_nested_reference_vector_column_accessor(expression)
+ expression.codes.each do |code|
+ value = code.value
+ next unless value.is_a?(Accessor)
+
+ sub_accessor = value
+ while sub_accessor.have_next?
+ object = sub_accessor.object
+ return value if object.is_a?(Column) and object.vector?
+ sub_accessor = sub_accessor.next
+ end
+ end
+ nil
+ end
+
+ def find_selector_only_procedure(expression)
+ expression.codes.each do |code|
+ value = code.value
+ return value if value.is_a?(Procedure) and value.selector_only?
+ end
+ nil
+ end
+
+ def execute_filter(range_index, expression_builder)
+ case @cover_type
+ when :all
+ filter_shard_all(range_index, expression_builder)
+ when :partial_min
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ @target_range.min, @target_range.min_border,
+ nil, nil)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_min(expression)
+ end
+ end
+ when :partial_max
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ nil, nil,
+ @target_range.max, @target_range.max_border)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_max(expression)
+ end
+ end
+ when :partial_min_and_max
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ @target_range.min, @target_range.min_border,
+ @target_range.max, @target_range.max_border)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ end
+ end
+ end
end
- def filter_shard_all(range_index)
+ def filter_shard_all(range_index, expression_builder)
+ table = @shard.table
if @filter.nil?
- if @table.size <= @context.current_offset
- @context.current_offset -= @table.size
+ if table.size <= @context.current_offset
+ @context.current_offset -= table.size
return
end
if range_index
- filter_by_range(range_index,
+ filter_by_range(range_index, expression_builder,
nil, nil,
nil, nil)
else
- sort_result_set(@table)
+ sort_result_set(table)
end
else
if range_index
- filter_by_range(range_index,
+ filter_by_range(range_index, expression_builder,
nil, nil,
nil, nil)
else
filter_table do |expression|
- @expression_builder.build_all(expression)
+ expression_builder.build_all(expression)
end
end
end
@@ -313,7 +484,7 @@ module Groonga
end
end
- def filter_by_range(range_index,
+ def filter_by_range(range_index, expression_builder,
min, min_border, max, max_border)
lexicon = range_index.domain
data_table = range_index.range
@@ -336,6 +507,10 @@ module Groonga
else
options[:limit] = current_limit
end
+ max_n_unmatched_records =
+ compute_max_n_unmatched_records(data_table.size,
+ options[:limit])
+ options[:max_n_unmatched_records] = max_n_unmatched_records
if @filter
create_expression(data_table) do |expression|
expression.parse(@filter)
@@ -349,6 +524,17 @@ module Groonga
n_matched_records = index_cursor.select(result_set, options)
end
end
+ if n_matched_records == -1
+ result_set.close
+ fallback_message =
+ "fallback because there are too much unmatched records: "
+ fallback_message << "<#{max_n_unmatched_records}>"
+ decide_use_range_index(false,
+ fallback_message,
+ __LINE__, __method__)
+ execute_filter(nil, expression_builder)
+ return
+ end
end
rescue
result_set.close
@@ -393,10 +579,24 @@ module Groonga
flags
end
+ def compute_max_n_unmatched_records(data_table_size, limit)
+ max_n_unmatched_records = limit * 100
+ max_n_sample_records = data_table_size
+ if max_n_sample_records > 10000
+ sample_ratio = 1 / (Math.log(data_table_size) ** 2)
+ max_n_sample_records = (max_n_sample_records * sample_ratio).ceil
+ end
+ if max_n_unmatched_records > max_n_sample_records
+ max_n_unmatched_records = max_n_sample_records
+ end
+ max_n_unmatched_records
+ end
+
def filter_table
- create_expression(@table) do |expression|
+ table = @shard.table
+ create_expression(table) do |expression|
yield(expression)
- result_set = @table.select(expression)
+ result_set = table.select(expression)
sort_result_set(result_set)
end
end