diff options
Diffstat (limited to 'storage/mroonga/vendor/groonga/plugins')
50 files changed, 3578 insertions, 1386 deletions
diff --git a/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt index 6d6a8df5d5e..244b60f9004 100644 --- a/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright(C) 2012-2015 Brazil +# Copyright(C) 2012-2016 Brazil # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -15,7 +15,6 @@ add_subdirectory(suggest) add_subdirectory(tokenizers) -add_subdirectory(table) add_subdirectory(query_expanders) add_subdirectory(ruby) add_subdirectory(token_filters) diff --git a/storage/mroonga/vendor/groonga/plugins/Makefile.am b/storage/mroonga/vendor/groonga/plugins/Makefile.am index dc3967899c2..6c98a0cc3f5 100644 --- a/storage/mroonga/vendor/groonga/plugins/Makefile.am +++ b/storage/mroonga/vendor/groonga/plugins/Makefile.am @@ -1,12 +1,12 @@ SUBDIRS = \ tokenizers \ suggest \ - table \ query_expanders \ ruby \ token_filters \ sharding \ - functions + functions \ + expression_rewriters EXTRA_DIST = \ CMakeLists.txt diff --git a/storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt index bd423a830b3..385b5c750d2 100644 --- a/storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright(C) 2012-2013 Brazil +# Copyright(C) 2015 Brazil # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -13,17 +13,14 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/../../lib - ${MRUBY_INCLUDE_DIRS}) - -read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am TABLE_SOURCES) if(NOT GRN_EMBED) - add_library(table MODULE ${TABLE_SOURCES}) - set_source_files_properties(${TABLE_SOURCES} - PROPERTIES - COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") - set_target_properties(table PROPERTIES PREFIX "") - target_link_libraries(table libgroonga) - install(TARGETS table DESTINATION "${GRN_RELATIVE_PLUGINS_DIR}/table") + if(GRN_WITH_MRUBY) + set(GRN_RELATIVE_EXPRESSION_REWRITER_PLUGINS_DIR + "${GRN_RELATIVE_PLUGINS_DIR}/expression_rewriters") + + read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am + EXPRESSION_REWRITERS) + install(FILES ${EXPRESSION_REWRITERS} + DESTINATION "${GRN_RELATIVE_SEXPRESSION_REWRITER_PLUGINS_DIR}") + endif() endif() diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am new file mode 100644 index 00000000000..60a032acca7 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am @@ -0,0 +1,9 @@ +EXTRA_DIST = \ + CMakeLists.txt + +if WITH_MRUBY +dist_expression_rewriter_plugins_DATA = \ + $(expression_rewriters) +endif + +include sources.am diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb new file mode 100644 index 00000000000..3dfee681d52 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb @@ -0,0 +1,147 @@ +module Groonga + module ExpressionRewriters + class Optimizer < ExpressionRewriter + register "optimizer" + + def rewrite + builder = ExpressionTreeBuilder.new(@expression) + root_node = builder.build + + variable = @expression[0] + table = context[variable.domain] + optimized_root_node = optimize_node(table, root_node) + + rewritten = Expression.create(table) + optimized_root_node.build(rewritten) + rewritten + end + + private + def optimize_node(table, node) + case node + when ExpressionTree::LogicalOperation + optimized_sub_nodes = node.nodes.collect do |sub_node| + optimize_node(table, sub_node) + end + case node.operator + when Operator::AND + optimized_sub_nodes = + optimize_and_sub_nodes(table, optimized_sub_nodes) + end + ExpressionTree::LogicalOperation.new(node.operator, + optimized_sub_nodes) + when ExpressionTree::BinaryOperation + optimized_left = optimize_node(table, node.left) + optimized_right = optimize_node(table, node.right) + if optimized_left.is_a?(ExpressionTree::Constant) and + optimized_right.is_a?(ExpressionTree::Variable) + ExpressionTree::BinaryOperation.new(node.operator, + optimized_right, + optimized_left) + elsif node.left == optimized_left and node.right == optimized_right + node + else + ExpressionTree::BinaryOperation.new(node.operator, + optimized_left, + optimized_right) + end + else + node + end + end + + def optimize_and_sub_nodes(table, sub_nodes) + grouped_sub_nodes = sub_nodes.group_by do |sub_node| + case sub_node + when ExpressionTree::BinaryOperation + if sub_node.left.is_a?(ExpressionTree::Variable) + sub_node.left.column + else + nil + end + else + nil + end + end + + optimized_nodes = [] + grouped_sub_nodes.each do |column, grouped_nodes| + if column + grouped_nodes = optimize_grouped_nodes(column, grouped_nodes) + end + optimized_nodes.concat(grouped_nodes) + end + + optimized_nodes.sort_by do |node| + node.estimate_size(table) + end + end + + COMPARISON_OPERATORS = [ + Operator::EQUAL, + Operator::NOT_EQUAL, + Operator::LESS, + Operator::GREATER, + Operator::LESS_EQUAL, + Operator::GREATER_EQUAL, + ] + def optimize_grouped_nodes(column, grouped_nodes) + target_nodes, done_nodes = grouped_nodes.partition do |node| + node.is_a?(ExpressionTree::BinaryOperation) and + COMPARISON_OPERATORS.include?(node.operator) and + node.right.is_a?(ExpressionTree::Constant) + end + + # TODO: target_nodes = remove_needless_nodes(target_nodes) + # e.g.: x < 1 && x < 3 -> x < 1: (x < 3) is meaningless + + if target_nodes.size == 2 + between_node = try_optimize_between(column, target_nodes) + if between_node + done_nodes << between_node + else + done_nodes.concat(target_nodes) + end + else + done_nodes.concat(target_nodes) + end + + done_nodes + end + + def try_optimize_between(column, target_nodes) + greater_node = nil + less_node = nil + target_nodes.each do |node| + case node.operator + when Operator::GREATER, Operator::GREATER_EQUAL + greater_node = node + when Operator::LESS, Operator::LESS_EQUAL + less_node = node + end + end + return nil if greater_node.nil? or less_node.nil? + + between = ExpressionTree::Procedure.new(context["between"]) + if greater_node.operator == Operator::GREATER + greater_border = "exclude" + else + greater_border = "include" + end + if less_node.operator == Operator::LESS + less_border = "exclude" + else + less_border = "include" + end + arguments = [ + ExpressionTree::Variable.new(column), + greater_node.right, + ExpressionTree::Constant.new(greater_border), + less_node.right, + ExpressionTree::Constant.new(less_border), + ] + ExpressionTree::FunctionCall.new(between, arguments) + end + end + end +end diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am new file mode 100644 index 00000000000..7670bed6126 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am @@ -0,0 +1,2 @@ +expression_rewriters = \ + optimizer.rb diff --git a/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt index d831589b28b..611e30b9c48 100644 --- a/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright(C) 2015 Brazil +# Copyright(C) 2015-2017 Brazil # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -38,3 +38,104 @@ else() install(TARGETS vector_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") endif() target_link_libraries(vector_functions libgroonga) + +read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/string_sources.am + STRING_SOURCES) +set_source_files_properties(${STRING_SOURCES} + PROPERTIES + COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") +if(GRN_EMBED) + add_library(string_functions STATIC ${STRING_SOURCES}) + set_target_properties( + string_functions + PROPERTIES + POSITION_INDEPENDENT_CODE ON) +else() + add_library(string_functions MODULE ${STRING_SOURCES}) + set_target_properties(string_functions PROPERTIES + PREFIX "" + OUTPUT_NAME "string") + install(TARGETS string_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") +endif() +target_link_libraries(string_functions libgroonga) + +read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/number_sources.am + NUMBER_SOURCES) +set_source_files_properties(${NUMBER_SOURCES} + PROPERTIES + COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") +if(GRN_EMBED) + add_library(number_functions STATIC ${NUMBER_SOURCES}) + set_target_properties( + number_functions + PROPERTIES + POSITION_INDEPENDENT_CODE ON) +else() + add_library(number_functions MODULE ${NUMBER_SOURCES}) + set_target_properties(number_functions PROPERTIES + PREFIX "" + OUTPUT_NAME "number") + install(TARGETS number_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") +endif() +target_link_libraries(number_functions libgroonga "${M_LIBS}") + +read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/time_sources.am + TIME_SOURCES) +set_source_files_properties(${TIME_SOURCES} + PROPERTIES + COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") +if(GRN_EMBED) + add_library(time_functions STATIC ${TIME_SOURCES}) + set_target_properties( + time_functions + PROPERTIES + POSITION_INDEPENDENT_CODE ON) +else() + add_library(time_functions MODULE ${TIME_SOURCES}) + set_target_properties(time_functions PROPERTIES + PREFIX "" + OUTPUT_NAME "time") + install(TARGETS time_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") +endif() +target_link_libraries(time_functions libgroonga) + +read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/index_column_sources.am + INDEX_COLUMN_SOURCES) +set_source_files_properties(${INDEX_COLUMN_SOURCES} + PROPERTIES + COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") +if(GRN_EMBED) + add_library(index_column_functions STATIC ${INDEX_COLUMN_SOURCES}) + set_target_properties( + index_column_functions + PROPERTIES + POSITION_INDEPENDENT_CODE ON) +else() + add_library(index_column_functions MODULE ${INDEX_COLUMN_SOURCES}) + set_target_properties(index_column_functions PROPERTIES + PREFIX "" + OUTPUT_NAME "index_column") + install(TARGETS index_column_functions + DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") +endif() +target_link_libraries(index_column_functions libgroonga) + +read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/math_sources.am + MATH_SOURCES) +set_source_files_properties(${MATH_SOURCES} + PROPERTIES + COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") +if(GRN_EMBED) + add_library(math_functions STATIC ${MATH_SOURCES}) + set_target_properties( + math_functions + PROPERTIES + POSITION_INDEPENDENT_CODE ON) +else() + add_library(math_functions MODULE ${MATH_SOURCES}) + set_target_properties(math_functions PROPERTIES + PREFIX "" + OUTPUT_NAME "math") + install(TARGETS math_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}") +endif() +target_link_libraries(math_functions libgroonga) diff --git a/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am b/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am index 5d8d1d9cc84..f57ee031afe 100644 --- a/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am +++ b/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am @@ -16,5 +16,18 @@ LIBS = \ function_plugins_LTLIBRARIES = function_plugins_LTLIBRARIES += vector.la +function_plugins_LTLIBRARIES += string.la +function_plugins_LTLIBRARIES += number.la +function_plugins_LTLIBRARIES += time.la +function_plugins_LTLIBRARIES += index_column.la +function_plugins_LTLIBRARIES += math.la include vector_sources.am +include string_sources.am +include number_sources.am +include time_sources.am +include index_column_sources.am +include math_sources.am + +number_la_LIBADD = -lm +math_la_LIBADD = -lm diff --git a/storage/mroonga/vendor/groonga/plugins/functions/index_column.c b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c new file mode 100644 index 00000000000..acb7355cf5c --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c @@ -0,0 +1,266 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef GRN_EMBEDDED +# define GRN_PLUGIN_FUNCTION_TAG functions_time +#endif + +#include <groonga/plugin.h> + +static grn_rc +selector_index_column_df_ratio_between(grn_ctx *ctx, + grn_obj *table, + grn_obj *index, + int n_args, + grn_obj **args, + grn_obj *res, + grn_operator op) +{ + grn_rc rc = GRN_SUCCESS; + grn_obj *index_column; + grn_ii *ii; + double min; + double max; + grn_obj *source_table; + unsigned int n_documents; + grn_posting posting; + + if ((n_args - 1) != 3) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio_between(): " + "wrong number of arguments (%d for 3)", n_args - 1); + rc = ctx->rc; + goto exit; + } + + index_column = args[1]; + ii = (grn_ii *)index_column; + min = GRN_FLOAT_VALUE(args[2]); + max = GRN_FLOAT_VALUE(args[3]); + + source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column)); + n_documents = grn_table_size(ctx, source_table); + memset(&posting, 0, sizeof(grn_posting)); + posting.sid = 1; + + if (op == GRN_OP_AND) { + GRN_TABLE_EACH_BEGIN(ctx, res, cursor, record_id) { + void *key; + grn_id term_id; + uint32_t n_match_documents; + double df_ratio; + + grn_table_cursor_get_key(ctx, cursor, &key); + term_id = *(grn_id *)key; + n_match_documents = grn_ii_estimate_size(ctx, ii, term_id); + if (n_match_documents > n_documents) { + n_match_documents = n_documents; + } + df_ratio = (double)n_match_documents / (double)n_documents; + if (min <= df_ratio && df_ratio <= max) { + posting.rid = term_id; + grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op); + } + } GRN_TABLE_EACH_END(ctx, cursor); + grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op); + } else { + GRN_TABLE_EACH_BEGIN(ctx, table, cursor, term_id) { + uint32_t n_match_documents; + double df_ratio; + + n_match_documents = grn_ii_estimate_size(ctx, ii, term_id); + if (n_match_documents > n_documents) { + n_match_documents = n_documents; + } + df_ratio = (double)n_match_documents / (double)n_documents; + { + void *key; + int key_size; + key_size = grn_table_cursor_get_key(ctx, cursor, &key); + } + if (min <= df_ratio && df_ratio <= max) { + posting.rid = term_id; + grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op); + } + } GRN_TABLE_EACH_END(ctx, cursor); + } + +exit : + return rc; +} + +static grn_obj * +func_index_column_df_ratio(grn_ctx *ctx, + int n_args, + grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *term_table; + grn_obj *index_column_name; + grn_obj *index_column; + grn_ii *ii; + grn_id term_id; + + if (n_args != 1) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "wrong number of arguments (%d for 1)", n_args - 1); + return NULL; + } + + { + grn_obj *expr; + grn_obj *variable; + + expr = grn_plugin_proc_get_caller(ctx, user_data); + if (!expr) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "called directly"); + return NULL; + } + + variable = grn_expr_get_var_by_offset(ctx, expr, 0); + if (!variable) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "caller expression must have target record information"); + return NULL; + } + + term_table = grn_ctx_at(ctx, variable->header.domain); + term_id = GRN_RECORD_VALUE(variable); + while (GRN_TRUE) { + grn_obj *key_type; + + key_type = grn_ctx_at(ctx, term_table->header.domain); + if (!grn_obj_is_table(ctx, key_type)) { + break; + } + + grn_table_get_key(ctx, term_table, term_id, &term_id, sizeof(grn_id)); + term_table = key_type; + } + } + + index_column_name = args[0]; + if (!grn_obj_is_text_family_bulk(ctx, index_column_name)) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, index_column_name); + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "the first argument must be index column name: %.*s", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + index_column = grn_obj_column(ctx, + term_table, + GRN_TEXT_VALUE(index_column_name), + GRN_TEXT_LEN(index_column_name)); + if (!index_column) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "nonexistent object: <%.*s>", + (int)GRN_TEXT_LEN(index_column_name), + GRN_TEXT_VALUE(index_column_name)); + return NULL; + } + + if (!grn_obj_is_index_column(ctx, index_column)) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, index_column); + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "index_column_df_ratio(): " + "the first argument must be index column: %.*s", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + if (grn_obj_is_accessor(ctx, index_column)) { + grn_obj_unlink(ctx, index_column); + } + return NULL; + } + + ii = (grn_ii *)index_column; + + { + grn_obj *source_table; + unsigned int n_documents; + uint32_t n_match_documents; + double df_ratio; + grn_obj *df_ratio_value; + + source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column)); + n_documents = grn_table_size(ctx, source_table); + n_match_documents = grn_ii_estimate_size(ctx, ii, term_id); + if (n_match_documents > n_documents) { + n_match_documents = n_documents; + } + df_ratio = (double)n_match_documents / (double)n_documents; + + df_ratio_value = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_FLOAT, 0); + if (!df_ratio_value) { + return NULL; + } + GRN_FLOAT_SET(ctx, df_ratio_value, df_ratio); + return df_ratio_value; + } +} + +grn_rc +GRN_PLUGIN_INIT(grn_ctx *ctx) +{ + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_REGISTER(grn_ctx *ctx) +{ + grn_obj *selector_proc; + + selector_proc = grn_proc_create(ctx, "index_column_df_ratio_between", -1, + GRN_PROC_FUNCTION, + NULL, NULL, NULL, 0, NULL); + grn_proc_set_selector(ctx, selector_proc, + selector_index_column_df_ratio_between); + grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP); + + grn_proc_create(ctx, "index_column_df_ratio", -1, + GRN_PROC_FUNCTION, + func_index_column_df_ratio, NULL, NULL, 0, NULL); + + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_FIN(grn_ctx *ctx) +{ + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am new file mode 100644 index 00000000000..261907bc570 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am @@ -0,0 +1,2 @@ +index_column_la_SOURCES = \ + index_column.c diff --git a/storage/mroonga/vendor/groonga/plugins/functions/math.c b/storage/mroonga/vendor/groonga/plugins/functions/math.c new file mode 100644 index 00000000000..2527012e9ea --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/math.c @@ -0,0 +1,142 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2017 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef GRN_EMBEDDED +# define GRN_PLUGIN_FUNCTION_TAG functions_math +#endif + +#include <groonga/plugin.h> + +#include <math.h> +#include <stdlib.h> + +static grn_obj * +func_math_abs(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *number; + grn_obj *grn_abs_number = NULL; + + if (n_args != 1) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "math_abs(): wrong number of arguments (%d for 1)", + n_args); + return NULL; + } + + number = args[0]; + if (!(number->header.type == GRN_BULK && + grn_type_id_is_number_family(ctx, number->header.domain))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, number); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "math_abs(): the first argument must be a number: " + "<%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + +#define ABS_AS_IS(return_type, to_type, getter, setter) { \ + grn_abs_number = grn_plugin_proc_alloc(ctx, \ + user_data, \ + (return_type), \ + 0); \ + if (!grn_abs_number) { \ + return NULL; \ + } \ + setter(ctx, grn_abs_number, getter(number)); \ + } +#define ABS_CONVERT_TYPE(func, return_type, to_type, getter, setter) { \ + grn_abs_number = grn_plugin_proc_alloc(ctx, \ + user_data, \ + (return_type), \ + 0); \ + if (!grn_abs_number) { \ + return NULL; \ + } else { \ + to_type abs_number_raw = (to_type)(func)(getter(number)); \ + setter(ctx, grn_abs_number, abs_number_raw); \ + } \ + } + + switch (number->header.domain) { + case GRN_DB_INT8: + ABS_CONVERT_TYPE(abs, GRN_DB_UINT8, uint8_t, GRN_INT8_VALUE, GRN_UINT8_SET); + break; + case GRN_DB_UINT8: + ABS_AS_IS(GRN_DB_UINT8, uint8_t, GRN_UINT8_VALUE, GRN_UINT8_SET); + break; + case GRN_DB_INT16: + ABS_CONVERT_TYPE(abs, GRN_DB_UINT16, uint16_t, GRN_INT16_VALUE, GRN_UINT16_SET); + break; + case GRN_DB_UINT16: + ABS_AS_IS(GRN_DB_UINT16, uint16_t, GRN_UINT16_VALUE, GRN_UINT16_SET); + break; + case GRN_DB_INT32: + ABS_CONVERT_TYPE(labs, GRN_DB_UINT32, uint32_t, GRN_INT32_VALUE, GRN_UINT32_SET); + break; + case GRN_DB_UINT32: + ABS_AS_IS(GRN_DB_UINT32, uint32_t, GRN_UINT32_VALUE, GRN_UINT32_SET); + break; + case GRN_DB_INT64: + ABS_CONVERT_TYPE(llabs, GRN_DB_UINT64, uint64_t, GRN_INT64_VALUE, GRN_UINT64_SET); + break; + case GRN_DB_UINT64: + ABS_AS_IS(GRN_DB_UINT64, uint64_t, GRN_UINT64_VALUE, GRN_UINT64_SET); + break; + case GRN_DB_FLOAT: + ABS_CONVERT_TYPE(fabs, GRN_DB_FLOAT, double, GRN_FLOAT_VALUE, GRN_FLOAT_SET); + break; + default : + break; + } +#undef ABS_CONVERT_TYPE +#undef ABS_AS_IS + + return grn_abs_number; +} + +grn_rc +GRN_PLUGIN_INIT(grn_ctx *ctx) +{ + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_REGISTER(grn_ctx *ctx) +{ + grn_rc rc = GRN_SUCCESS; + + grn_proc_create(ctx, + "math_abs", -1, + GRN_PROC_FUNCTION, + func_math_abs, + NULL, NULL, 0, NULL); + + return rc; +} + +grn_rc +GRN_PLUGIN_FIN(grn_ctx *ctx) +{ + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am new file mode 100644 index 00000000000..8c14ca7469d --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am @@ -0,0 +1,2 @@ +math_la_SOURCES = \ + math.c diff --git a/storage/mroonga/vendor/groonga/plugins/functions/number.c b/storage/mroonga/vendor/groonga/plugins/functions/number.c new file mode 100644 index 00000000000..285c88f58f2 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/number.c @@ -0,0 +1,187 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef GRN_EMBEDDED +# define GRN_PLUGIN_FUNCTION_TAG functions_number +#endif + +#include <groonga/plugin.h> + +#include <math.h> + +static grn_obj * +func_number_classify(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *number; + grn_obj *interval; + grn_obj casted_interval; + grn_obj *classed_number; + + if (n_args != 2) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "number_classify(): wrong number of arguments (%d for 2)", + n_args); + return NULL; + } + + number = args[0]; + if (!(number->header.type == GRN_BULK && + grn_type_id_is_number_family(ctx, number->header.domain))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, number); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "number_classify(): the first argument must be a number: " + "<%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + interval = args[1]; + if (!(interval->header.type == GRN_BULK && + grn_type_id_is_number_family(ctx, interval->header.domain))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, interval); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "number_classify(): the second argument must be a number: " + "<%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + classed_number = grn_plugin_proc_alloc(ctx, + user_data, + number->header.domain, + 0); + if (!classed_number) { + return NULL; + } + + GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, number->header.domain); + grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE); + +#define CLASSIFY_RAW(type, getter, setter, classifier) { \ + type number_raw; \ + type interval_raw; \ + type class_raw; \ + type classed_number_raw; \ + \ + number_raw = getter(number); \ + interval_raw = getter(&casted_interval); \ + class_raw = classifier(number_raw, interval_raw); \ + classed_number_raw = class_raw * interval_raw; \ + setter(ctx, classed_number, classed_number_raw); \ + } + +#define CLASSIFIER_INT(number_raw, interval_raw) \ + (number_raw) < 0 ? \ + ((((number_raw) + 1) / (interval_raw)) - 1) : \ + (((number_raw) / (interval_raw))) + +#define CLASSIFY_INT(type, getter, setter) \ + CLASSIFY_RAW(type, getter, setter, CLASSIFIER_INT) + +#define CLASSIFIER_UINT(number_raw, interval_raw) \ + ((number_raw) / (interval_raw)) + +#define CLASSIFY_UINT(type, getter, setter) \ + CLASSIFY_RAW(type, getter, setter, CLASSIFIER_UINT) + +#define CLASSIFIER_FLOAT(number_raw, interval_raw) \ + floor((number_raw) / (interval_raw)) + +#define CLASSIFY_FLOAT(getter, setter) \ + CLASSIFY_RAW(double, getter, setter, CLASSIFIER_FLOAT) + + switch (number->header.domain) { + case GRN_DB_INT8 : + CLASSIFY_INT(int8_t, GRN_INT8_VALUE, GRN_INT8_SET); + break; + case GRN_DB_UINT8 : + CLASSIFY_UINT(uint8_t, GRN_UINT8_VALUE, GRN_UINT8_SET); + break; + case GRN_DB_INT16 : + CLASSIFY_INT(int16_t, GRN_INT16_VALUE, GRN_INT16_SET); + break; + case GRN_DB_UINT16 : + CLASSIFY_UINT(uint16_t, GRN_UINT16_VALUE, GRN_UINT16_SET); + break; + case GRN_DB_INT32 : + CLASSIFY_INT(int32_t, GRN_INT32_VALUE, GRN_INT32_SET); + break; + case GRN_DB_UINT32 : + CLASSIFY_UINT(uint32_t, GRN_UINT32_VALUE, GRN_UINT32_SET); + break; + case GRN_DB_INT64 : + CLASSIFY_INT(int64_t, GRN_INT64_VALUE, GRN_INT64_SET); + break; + case GRN_DB_UINT64 : + CLASSIFY_UINT(uint64_t, GRN_UINT64_VALUE, GRN_UINT64_SET); + break; + case GRN_DB_FLOAT : + CLASSIFY_FLOAT(GRN_FLOAT_VALUE, GRN_FLOAT_SET); + break; + default : + break; + } +#undef CLASSIFY_FLOAT +#undef CLASSIFIER_FLAOT +#undef CLASSIFY_UINT +#undef CLASSIFIER_UINT +#undef CLASSIFY_INT +#undef CLASSIFIER_INT +#undef CLASSIFY_RAW + + GRN_OBJ_FIN(ctx, &casted_interval); + + return classed_number; +} + +grn_rc +GRN_PLUGIN_INIT(grn_ctx *ctx) +{ + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_REGISTER(grn_ctx *ctx) +{ + grn_rc rc = GRN_SUCCESS; + + grn_proc_create(ctx, + "number_classify", -1, + GRN_PROC_FUNCTION, + func_number_classify, + NULL, NULL, 0, NULL); + + return rc; +} + +grn_rc +GRN_PLUGIN_FIN(grn_ctx *ctx) +{ + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am new file mode 100644 index 00000000000..b3d9483be9a --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am @@ -0,0 +1,2 @@ +number_la_SOURCES = \ + number.c diff --git a/storage/mroonga/vendor/groonga/plugins/functions/string.c b/storage/mroonga/vendor/groonga/plugins/functions/string.c new file mode 100644 index 00000000000..aeef335ec55 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/string.c @@ -0,0 +1,299 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef GRN_EMBEDDED +# define GRN_PLUGIN_FUNCTION_TAG functions_string +#endif + +#include <groonga/plugin.h> + +/* + * func_string_length() returns the number of characters in a string. + * If the string contains an invalid byte sequence, this function returns the + * number of characters before the invalid byte sequence. + */ +static grn_obj * +func_string_length(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *target; + unsigned int length = 0; + grn_obj *grn_length; + + if (n_args != 1) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_length(): wrong number of arguments (%d for 1)", + n_args); + return NULL; + } + + target = args[0]; + if (!(target->header.type == GRN_BULK && + ((target->header.domain == GRN_DB_SHORT_TEXT) || + (target->header.domain == GRN_DB_TEXT) || + (target->header.domain == GRN_DB_LONG_TEXT)))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, target); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_length(): target object must be a text bulk: " + "<%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + { + const char *s = GRN_TEXT_VALUE(target); + const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target); + const char *p; + unsigned int cl = 0; + for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { + length++; + } + } + + grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0); + if (!grn_length) { + return NULL; + } + + GRN_UINT32_SET(ctx, grn_length, length); + + return grn_length; +} + +static grn_obj * +func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *target; + grn_obj *from_raw; + grn_obj *length_raw = NULL; + int64_t from = 0; + int64_t length = -1; + const char *start = NULL; + const char *end = NULL; + grn_obj *substring; + + if (n_args < 2) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_substring(): wrong number of arguments (%d for 2..3)", + n_args); + return NULL; + } + + target = args[0]; + from_raw = args[1]; + if (n_args == 3) { + length_raw = args[2]; + } + + if (!(target->header.type == GRN_BULK && + grn_type_id_is_text_family(ctx, target->header.domain))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, target); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_substring(): target object must be a text bulk: " + "<%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + /* TODO: extract as grn_func_arg_int64() */ + if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, from_raw); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_substring(): from must be a number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + if (from_raw->header.domain == GRN_DB_INT32) { + from = GRN_INT32_VALUE(from_raw); + } else if (from_raw->header.domain == GRN_DB_INT64) { + from = GRN_INT64_VALUE(from_raw); + } else { + grn_obj buffer; + grn_rc rc; + + GRN_INT64_INIT(&buffer, 0); + rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); + if (rc == GRN_SUCCESS) { + from = GRN_INT64_VALUE(&buffer); + } + GRN_OBJ_FIN(ctx, &buffer); + + if (rc != GRN_SUCCESS) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, from_raw); + GRN_PLUGIN_ERROR(ctx, rc, + "string_substring(): " + "failed to cast from value to number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + } + + if (length_raw) { + /* TODO: extract as grn_func_arg_int64() */ + if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, length_raw); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "string_substring(): length must be a number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + if (length_raw->header.domain == GRN_DB_INT32) { + length = GRN_INT32_VALUE(length_raw); + } else if (length_raw->header.domain == GRN_DB_INT64) { + length = GRN_INT64_VALUE(length_raw); + } else { + grn_obj buffer; + grn_rc rc; + + GRN_INT64_INIT(&buffer, 0); + rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); + if (rc == GRN_SUCCESS) { + length = GRN_INT64_VALUE(&buffer); + } + GRN_OBJ_FIN(ctx, &buffer); + + if (rc != GRN_SUCCESS) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, length_raw); + GRN_PLUGIN_ERROR(ctx, rc, + "string_substring(): " + "failed to cast length value to number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + } + } + + substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0); + if (!substring) { + return NULL; + } + + GRN_BULK_REWIND(substring); + + if (GRN_TEXT_LEN(target) == 0) { + return substring; + } + if (length == 0) { + return substring; + } + + while (from < 0) { + from += GRN_TEXT_LEN(target); + } + + { + const char *p; + + start = NULL; + p = GRN_TEXT_VALUE(target); + end = p + GRN_TEXT_LEN(target); + + if (from == 0) { + start = p; + } else { + unsigned int char_length = 0; + size_t n_chars = 0; + + for (; + p < end && (char_length = grn_charlen(ctx, p, end)); + p += char_length, n_chars++) { + if (n_chars == from) { + start = p; + break; + } + } + } + + if (start && length > 0) { + unsigned int char_length = 0; + size_t n_chars = 0; + + for (; + p < end && (char_length = grn_charlen(ctx, p, end)); + p += char_length, n_chars++) { + if (n_chars == length) { + end = p; + break; + } + } + } + } + + if (start) { + GRN_TEXT_SET(ctx, substring, start, end - start); + } + + return substring; +} + +grn_rc +GRN_PLUGIN_INIT(grn_ctx *ctx) +{ + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_REGISTER(grn_ctx *ctx) +{ + grn_rc rc = GRN_SUCCESS; + + grn_proc_create(ctx, "string_length", -1, GRN_PROC_FUNCTION, func_string_length, + NULL, NULL, 0, NULL); + + grn_proc_create(ctx, "string_substring", -1, GRN_PROC_FUNCTION, func_string_substring, + NULL, NULL, 0, NULL); + + return rc; +} + +grn_rc +GRN_PLUGIN_FIN(grn_ctx *ctx) +{ + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am new file mode 100644 index 00000000000..3477e58abe1 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am @@ -0,0 +1,2 @@ +string_la_SOURCES = \ + string.c diff --git a/storage/mroonga/vendor/groonga/plugins/functions/time.c b/storage/mroonga/vendor/groonga/plugins/functions/time.c new file mode 100644 index 00000000000..dc54f47ea52 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/time.c @@ -0,0 +1,376 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2016 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef GRN_EMBEDDED +# define GRN_PLUGIN_FUNCTION_TAG functions_time +#endif + +#include <groonga/plugin.h> + +#include <math.h> + +typedef enum { + GRN_TIME_CLASSIFY_UNIT_SECOND, + GRN_TIME_CLASSIFY_UNIT_MINUTE, + GRN_TIME_CLASSIFY_UNIT_HOUR, + GRN_TIME_CLASSIFY_UNIT_DAY, + GRN_TIME_CLASSIFY_UNIT_WEEK, + GRN_TIME_CLASSIFY_UNIT_MONTH, + GRN_TIME_CLASSIFY_UNIT_YEAR +} grn_time_classify_unit; + +static grn_obj * +func_time_classify_raw(grn_ctx *ctx, + int n_args, + grn_obj **args, + grn_user_data *user_data, + const char *function_name, + grn_time_classify_unit unit) +{ + grn_obj *time; + uint32_t interval_raw = 1; + grn_obj *classed_time; + grn_bool accept_interval = GRN_TRUE; + + switch (unit) { + case GRN_TIME_CLASSIFY_UNIT_SECOND : + case GRN_TIME_CLASSIFY_UNIT_MINUTE : + case GRN_TIME_CLASSIFY_UNIT_HOUR : + accept_interval = GRN_TRUE; + break; + case GRN_TIME_CLASSIFY_UNIT_DAY : + case GRN_TIME_CLASSIFY_UNIT_WEEK : + accept_interval = GRN_FALSE; + break; + case GRN_TIME_CLASSIFY_UNIT_MONTH : + case GRN_TIME_CLASSIFY_UNIT_YEAR : + accept_interval = GRN_TRUE; + break; + } + + if (accept_interval) { + if (!(n_args == 1 || n_args == 2)) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "%s(): " + "wrong number of arguments (%d for 1..2)", + function_name, + n_args); + return NULL; + } + } else { + if (n_args != 1) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "%s(): " + "wrong number of arguments (%d for 1)", + function_name, + n_args); + return NULL; + } + } + + time = args[0]; + if (!(time->header.type == GRN_BULK && + time->header.domain == GRN_DB_TIME)) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, time); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "%s(): " + "the first argument must be a time: " + "<%.*s>", + function_name, + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + if (n_args == 2) { + grn_obj *interval; + grn_obj casted_interval; + + interval = args[1]; + if (!(interval->header.type == GRN_BULK && + grn_type_id_is_number_family(ctx, interval->header.domain))) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, interval); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "%s(): " + "the second argument must be a number: " + "<%.*s>", + function_name, + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + + GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, GRN_DB_UINT32); + grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE); + interval_raw = GRN_UINT32_VALUE(&casted_interval); + GRN_OBJ_FIN(ctx, &casted_interval); + + if (interval_raw == 0) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, interval); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "%s(): " + "the second argument must not be zero: " + "<%.*s>", + function_name, + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + } + + { + int64_t time_raw; + struct tm tm; + int64_t classed_time_raw; + + time_raw = GRN_TIME_VALUE(time); + if (!grn_time_to_tm(ctx, time_raw, &tm)) { + return NULL; + } + + switch (unit) { + case GRN_TIME_CLASSIFY_UNIT_SECOND : + tm.tm_sec = (tm.tm_sec / interval_raw) * interval_raw; + break; + case GRN_TIME_CLASSIFY_UNIT_MINUTE : + tm.tm_min = (tm.tm_min / interval_raw) * interval_raw; + tm.tm_sec = 0; + break; + case GRN_TIME_CLASSIFY_UNIT_HOUR : + tm.tm_hour = (tm.tm_hour / interval_raw) * interval_raw; + tm.tm_min = 0; + tm.tm_sec = 0; + break; + case GRN_TIME_CLASSIFY_UNIT_DAY : + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + break; + case GRN_TIME_CLASSIFY_UNIT_WEEK : + if ((tm.tm_mday - tm.tm_wday) >= 0) { + tm.tm_mday -= tm.tm_wday; + } else { + int n_underflowed_mday = -(tm.tm_mday - tm.tm_wday); + int mday; + int max_mday = 31; + + if (tm.tm_mon == 0) { + tm.tm_year--; + tm.tm_mon = 11; + } else { + tm.tm_mon--; + } + + for (mday = max_mday; mday > n_underflowed_mday; mday--) { + int64_t unused; + tm.tm_mday = mday; + if (grn_time_from_tm(ctx, &unused, &tm)) { + break; + } + } + tm.tm_mday -= n_underflowed_mday; + } + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + break; + case GRN_TIME_CLASSIFY_UNIT_MONTH : + tm.tm_mon = (tm.tm_mon / interval_raw) * interval_raw; + tm.tm_mday = 1; + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + break; + case GRN_TIME_CLASSIFY_UNIT_YEAR : + tm.tm_year = (((1900 + tm.tm_year) / interval_raw) * interval_raw) - 1900; + tm.tm_mon = 0; + tm.tm_mday = 1; + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + break; + } + + if (!grn_time_from_tm(ctx, &classed_time_raw, &tm)) { + return NULL; + } + + classed_time = grn_plugin_proc_alloc(ctx, + user_data, + time->header.domain, + 0); + if (!classed_time) { + return NULL; + } + GRN_TIME_SET(ctx, classed_time, classed_time_raw); + + return classed_time; + } +} + +static grn_obj * +func_time_classify_second(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_second", + GRN_TIME_CLASSIFY_UNIT_SECOND); +} + +static grn_obj * +func_time_classify_minute(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_minute", + GRN_TIME_CLASSIFY_UNIT_MINUTE); +} + +static grn_obj * +func_time_classify_hour(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_hour", + GRN_TIME_CLASSIFY_UNIT_HOUR); +} + +static grn_obj * +func_time_classify_day(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_day", + GRN_TIME_CLASSIFY_UNIT_DAY); +} + +static grn_obj * +func_time_classify_week(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_week", + GRN_TIME_CLASSIFY_UNIT_WEEK); +} + +static grn_obj * +func_time_classify_month(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_month", + GRN_TIME_CLASSIFY_UNIT_MONTH); +} + +static grn_obj * +func_time_classify_year(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + return func_time_classify_raw(ctx, + n_args, + args, + user_data, + "time_classify_year", + GRN_TIME_CLASSIFY_UNIT_YEAR); +} + +grn_rc +GRN_PLUGIN_INIT(grn_ctx *ctx) +{ + return ctx->rc; +} + +grn_rc +GRN_PLUGIN_REGISTER(grn_ctx *ctx) +{ + grn_rc rc = GRN_SUCCESS; + + grn_proc_create(ctx, + "time_classify_second", -1, + GRN_PROC_FUNCTION, + func_time_classify_second, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_minute", -1, + GRN_PROC_FUNCTION, + func_time_classify_minute, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_hour", -1, + GRN_PROC_FUNCTION, + func_time_classify_hour, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_day", -1, + GRN_PROC_FUNCTION, + func_time_classify_day, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_week", -1, + GRN_PROC_FUNCTION, + func_time_classify_week, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_month", -1, + GRN_PROC_FUNCTION, + func_time_classify_month, + NULL, NULL, 0, NULL); + grn_proc_create(ctx, + "time_classify_year", -1, + GRN_PROC_FUNCTION, + func_time_classify_year, + NULL, NULL, 0, NULL); + + return rc; +} + +grn_rc +GRN_PLUGIN_FIN(grn_ctx *ctx) +{ + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am new file mode 100644 index 00000000000..2c55a570f0b --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am @@ -0,0 +1,2 @@ +time_la_SOURCES = \ + time.c diff --git a/storage/mroonga/vendor/groonga/plugins/functions/vector.c b/storage/mroonga/vendor/groonga/plugins/functions/vector.c index a92fee9dbec..a7283fc59eb 100644 --- a/storage/mroonga/vendor/groonga/plugins/functions/vector.c +++ b/storage/mroonga/vendor/groonga/plugins/functions/vector.c @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2015 Brazil + Copyright(C) 2015-2017 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -70,6 +70,307 @@ func_vector_size(grn_ctx *ctx, int n_args, grn_obj **args, return grn_size; } +static grn_obj * +func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *target; + grn_obj *from_raw = NULL; + grn_obj *length_raw = NULL; + int64_t from = 0; + int64_t length = -1; + uint32_t to = 0; + uint32_t size = 0; + grn_obj *slice; + + if (n_args < 2 || n_args > 3) { + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "vector_slice(): wrong number of arguments (%d for 2..3)", + n_args); + return NULL; + } + + target = args[0]; + from_raw = args[1]; + if (n_args == 3) { + length_raw = args[2]; + } + switch (target->header.type) { + case GRN_VECTOR : + case GRN_PVECTOR : + case GRN_UVECTOR : + size = grn_vector_size(ctx, target); + break; + default : + { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, target, &inspected); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "vector_slice(): target object must be vector: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + break; + } + + if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, from_raw); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "vector_slice(): from must be a number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + if (from_raw->header.domain == GRN_DB_INT32) { + from = GRN_INT32_VALUE(from_raw); + } else if (from_raw->header.domain == GRN_DB_INT64) { + from = GRN_INT64_VALUE(from_raw); + } else { + grn_obj buffer; + grn_rc rc; + + GRN_INT64_INIT(&buffer, 0); + rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); + if (rc == GRN_SUCCESS) { + from = GRN_INT64_VALUE(&buffer); + } + GRN_OBJ_FIN(ctx, &buffer); + + if (rc != GRN_SUCCESS) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, from_raw); + GRN_PLUGIN_ERROR(ctx, rc, + "vector_slice(): " + "failed to cast from value to number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + } + + if (length_raw) { + if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, length_raw); + GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, + "vector_slice(): length must be a number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + if (length_raw->header.domain == GRN_DB_INT32) { + length = GRN_INT32_VALUE(length_raw); + } else if (length_raw->header.domain == GRN_DB_INT64) { + length = GRN_INT64_VALUE(length_raw); + } else { + grn_obj buffer; + grn_rc rc; + + GRN_INT64_INIT(&buffer, 0); + rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); + if (rc == GRN_SUCCESS) { + length = GRN_INT64_VALUE(&buffer); + } + GRN_OBJ_FIN(ctx, &buffer); + + if (rc != GRN_SUCCESS) { + grn_obj inspected; + + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, length_raw); + GRN_PLUGIN_ERROR(ctx, rc, + "vector_slice(): " + "failed to cast length value to number: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return NULL; + } + } + } + + slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR); + if (!slice) { + return NULL; + } + + if (target->header.flags & GRN_OBJ_WITH_WEIGHT) { + slice->header.flags |= GRN_OBJ_WITH_WEIGHT; + } + + if (length < 0) { + length = size + length + 1; + } + + if (length > size) { + length = size; + } + + if (length <= 0) { + return slice; + } + + while (from < 0) { + from += size; + } + + to = from + length; + if (to > size) { + to = size; + } + + switch (target->header.type) { + case GRN_VECTOR : + { + unsigned int i; + for (i = from; i < to; i++) { + const char *content; + unsigned int content_length; + unsigned int weight; + grn_id domain; + content_length = grn_vector_get_element(ctx, target, i, + &content, &weight, &domain); + grn_vector_add_element(ctx, slice, + content, content_length, weight, domain); + } + } + break; + case GRN_PVECTOR : + { + unsigned int i; + for (i = from; i < to; i++) { + grn_obj *element = GRN_PTR_VALUE_AT(target, i); + GRN_PTR_PUT(ctx, slice, element); + } + } + break; + case GRN_UVECTOR : + { + grn_obj *domain; + + domain = grn_ctx_at(ctx, target->header.domain); + if (grn_obj_is_table(ctx, domain)) { + unsigned int i; + for (i = from; i < to; i++) { + grn_id id; + unsigned int weight; + id = grn_uvector_get_element(ctx, target, i, &weight); + grn_uvector_add_element(ctx, slice, id, weight); + } + } else { +#define PUT_SLICE_VALUES(type) do { \ + unsigned int i; \ + for (i = from; i < to; i++) { \ + GRN_ ## type ## _PUT(ctx, \ + slice, \ + GRN_ ## type ## _VALUE_AT(target, i)); \ + } \ + } while (GRN_FALSE) + switch (target->header.domain) { + case GRN_DB_BOOL : + PUT_SLICE_VALUES(BOOL); + break; + case GRN_DB_INT8 : + PUT_SLICE_VALUES(INT8); + break; + case GRN_DB_UINT8 : + PUT_SLICE_VALUES(UINT8); + break; + case GRN_DB_INT16 : + PUT_SLICE_VALUES(INT16); + break; + case GRN_DB_UINT16 : + PUT_SLICE_VALUES(UINT16); + break; + case GRN_DB_INT32 : + PUT_SLICE_VALUES(INT32); + break; + case GRN_DB_UINT32 : + PUT_SLICE_VALUES(UINT32); + break; + case GRN_DB_INT64 : + PUT_SLICE_VALUES(INT64); + break; + case GRN_DB_UINT64 : + PUT_SLICE_VALUES(UINT64); + break; + case GRN_DB_FLOAT : + PUT_SLICE_VALUES(FLOAT); + break; + case GRN_DB_TIME : + PUT_SLICE_VALUES(TIME); + break; + } + } + } + break; +#undef PUT_SLICE_VALUES + } + + return slice; +} + +static grn_obj * +func_vector_new(grn_ctx *ctx, int n_args, grn_obj **args, + grn_user_data *user_data) +{ + grn_obj *vector = NULL; + int i; + + if (n_args == 0) { + return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, GRN_OBJ_VECTOR); + } + + vector = grn_plugin_proc_alloc(ctx, + user_data, + args[0]->header.domain, + GRN_OBJ_VECTOR); + if (!vector) { + return NULL; + } + + for (i = 0; i < n_args; i++) { + grn_obj *element = args[i]; + switch (vector->header.type) { + case GRN_VECTOR : + grn_vector_add_element(ctx, + vector, + GRN_BULK_HEAD(element), + GRN_BULK_VSIZE(element), + 0, + element->header.domain); + break; + case GRN_UVECTOR : + grn_bulk_write(ctx, + vector, + GRN_BULK_HEAD(element), + GRN_BULK_VSIZE(element)); + break; + case GRN_PVECTOR : + GRN_PTR_PUT(ctx, vector, element); + break; + default : + break; + } + } + + return vector; +} + grn_rc GRN_PLUGIN_INIT(grn_ctx *ctx) { @@ -84,6 +385,12 @@ GRN_PLUGIN_REGISTER(grn_ctx *ctx) grn_proc_create(ctx, "vector_size", -1, GRN_PROC_FUNCTION, func_vector_size, NULL, NULL, 0, NULL); + grn_proc_create(ctx, "vector_slice", -1, GRN_PROC_FUNCTION, func_vector_slice, + NULL, NULL, 0, NULL); + + grn_proc_create(ctx, "vector_new", -1, GRN_PROC_FUNCTION, func_vector_new, + NULL, NULL, 0, NULL); + return rc; } diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am index ca26760d332..96c0911a070 100644 --- a/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am +++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am @@ -14,7 +14,7 @@ AM_LDFLAGS = \ LIBS = \ $(top_builddir)/lib/libgroonga.la -query_expanders_plugins_LTLIBRARIES = -query_expanders_plugins_LTLIBRARIES += tsv.la +query_expander_plugins_LTLIBRARIES = +query_expander_plugins_LTLIBRARIES += tsv.la include tsv_sources.am diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c index 00550f6822b..2b0a5ba244e 100644 --- a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c +++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c @@ -19,27 +19,20 @@ # define GRN_PLUGIN_FUNCTION_TAG query_expanders_tsv #endif -/* groonga's internal headers */ -/* for grn_text_fgets(): We don't want to require stdio.h for groonga.h. - What should we do? Should we split header file such as groonga/stdio.h? */ -#include <grn_str.h> +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ #include <groonga/plugin.h> -#include <stdio.h> +#include <stdlib.h> #include <string.h> #ifdef WIN32 +# include <windows.h> # include <share.h> #endif /* WIN32 */ -#ifdef HAVE__STRNICMP -# ifdef strncasecmp -# undef strncasecmp -# endif /* strncasecmp */ -# define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n) -#endif /* HAVE__STRNICMP */ - #define MAX_SYNONYM_BYTES 4096 static grn_hash *synonyms = NULL; @@ -54,7 +47,7 @@ get_system_synonyms_file(void) const char *relative_path = GRN_QUERY_EXPANDER_TSV_RELATIVE_SYNONYMS_FILE; size_t base_dir_length; - base_dir = grn_plugin_win32_base_dir(); + base_dir = grn_plugin_windows_base_dir(); base_dir_length = strlen(base_dir); grn_strcpy(win32_synonyms_file, MAX_PATH, base_dir); grn_strcat(win32_synonyms_file, MAX_PATH, "/"); @@ -71,13 +64,13 @@ get_system_synonyms_file(void) } #endif /* WIN32 */ -static inline grn_bool +static grn_bool is_comment_mark(char character) { return character == '#'; } -static inline grn_encoding +static grn_encoding detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length) { grn_encoding encoding = GRN_ENC_NONE; @@ -95,19 +88,19 @@ detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length) coding_part = strstr(c_line, coding_part_keyword); if (coding_part) { encoding_name = coding_part + strlen(coding_part_keyword); - if (strncasecmp(encoding_name, "utf-8", strlen("utf-8")) == 0 || - strncasecmp(encoding_name, "utf8", strlen("utf8")) == 0) { + if (grn_strncasecmp(encoding_name, "utf-8", strlen("utf-8")) == 0 || + grn_strncasecmp(encoding_name, "utf8", strlen("utf8")) == 0) { encoding = GRN_ENC_UTF8; - } else if (strncasecmp(encoding_name, "sjis", strlen("sjis")) == 0 || - strncasecmp(encoding_name, "Shift_JIS", strlen("Shift_JIS")) == 0) { + } else if (grn_strncasecmp(encoding_name, "sjis", strlen("sjis")) == 0 || + grn_strncasecmp(encoding_name, "Shift_JIS", strlen("Shift_JIS")) == 0) { encoding = GRN_ENC_SJIS; - } else if (strncasecmp(encoding_name, "EUC-JP", strlen("EUC-JP")) == 0 || - strncasecmp(encoding_name, "euc_jp", strlen("euc_jp")) == 0) { + } else if (grn_strncasecmp(encoding_name, "EUC-JP", strlen("EUC-JP")) == 0 || + grn_strncasecmp(encoding_name, "euc_jp", strlen("euc_jp")) == 0) { encoding = GRN_ENC_EUC_JP; - } else if (strncasecmp(encoding_name, "latin1", strlen("latin1")) == 0) { + } else if (grn_strncasecmp(encoding_name, "latin1", strlen("latin1")) == 0) { encoding = GRN_ENC_LATIN1; - } else if (strncasecmp(encoding_name, "KOI8-R", strlen("KOI8-R")) == 0 || - strncasecmp(encoding_name, "koi8r", strlen("koi8r")) == 0) { + } else if (grn_strncasecmp(encoding_name, "KOI8-R", strlen("KOI8-R")) == 0 || + grn_strncasecmp(encoding_name, "koi8r", strlen("koi8r")) == 0) { encoding = GRN_ENC_KOI8R; } } else { @@ -118,7 +111,7 @@ detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length) return encoding; } -static inline grn_encoding +static grn_encoding guess_encoding(grn_ctx *ctx, const char **line, size_t *line_length) { const char bom[] = {0xef, 0xbb, 0xbf}; @@ -138,7 +131,7 @@ guess_encoding(grn_ctx *ctx, const char **line, size_t *line_length) } static void -parse_synonyms_file_line(grn_ctx *ctx, const char *line, int line_length, +parse_synonyms_file_line(grn_ctx *ctx, const char *line, size_t line_length, grn_obj *key, grn_obj *value) { size_t i = 0; @@ -201,7 +194,7 @@ load_synonyms(grn_ctx *ctx) { static char path_env[GRN_ENV_BUFFER_SIZE]; const char *path; - FILE *file; + grn_file_reader *file_reader; int number_of_lines; grn_encoding encoding; grn_obj line, key, value; @@ -214,8 +207,8 @@ load_synonyms(grn_ctx *ctx) } else { path = get_system_synonyms_file(); } - file = grn_fopen(path, "r"); - if (!file) { + file_reader = grn_file_reader_open(ctx, path); + if (!file_reader) { GRN_LOG(ctx, GRN_LOG_WARNING, "[plugin][query-expander][tsv] " "synonyms file doesn't exist: <%s>", @@ -228,7 +221,7 @@ load_synonyms(grn_ctx *ctx) GRN_TEXT_INIT(&value, 0); grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES); number_of_lines = 0; - while (grn_text_fgets(ctx, &line, file) == GRN_SUCCESS) { + while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) { const char *line_value = GRN_TEXT_VALUE(&line); size_t line_length = GRN_TEXT_LEN(&line); @@ -252,7 +245,7 @@ load_synonyms(grn_ctx *ctx) GRN_OBJ_FIN(ctx, &key); GRN_OBJ_FIN(ctx, &value); - fclose(file); + grn_file_reader_close(ctx, file_reader); } static grn_obj * diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt index 63a79938552..7d34be18546 100644 --- a/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright(C) 2013 Brazil +# Copyright(C) 2013-2016 Brazil # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -13,48 +13,12 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/../../lib - ${MRUBY_INCLUDE_DIRS}) +if(NOT GRN_EMBED) + if(GRN_WITH_MRUBY) + set(GRN_RELATIVE_RUBY_PLUGINS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/ruby") -if(GRN_WITH_MRUBY) - set(GRN_RELATIVE_RUBY_PLUGINS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/ruby") - - read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/eval_sources.am RUBY_EVAL_SOURCES) - set_source_files_properties(${RUBY_EVAL_SOURCES} - PROPERTIES - COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") - if(GRN_EMBED) - add_library(ruby_eval STATIC ${RUBY_EVAL_SOURCES}) - set_target_properties( - ruby_eval - PROPERTIES - POSITION_INDEPENDENT_CODE ON) - else() - add_library(ruby_eval MODULE ${RUBY_EVAL_SOURCES}) - set_target_properties(ruby_eval PROPERTIES - PREFIX "" - OUTPUT_NAME "eval") - install(TARGETS ruby_eval DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}") - endif() - target_link_libraries(ruby_eval libgroonga) - - read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/load_sources.am RUBY_LOAD_SOURCES) - set_source_files_properties(${RUBY_LOAD_SOURCES} - PROPERTIES - COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") - if(GRN_EMBED) - add_library(ruby_load STATIC ${RUBY_LOAD_SOURCES}) - set_target_properties( - ruby_load - PROPERTIES - POSITION_INDEPENDENT_CODE ON) - else() - add_library(ruby_load MODULE ${RUBY_LOAD_SOURCES}) - set_target_properties(ruby_load PROPERTIES - PREFIX "" - OUTPUT_NAME "load") - install(TARGETS ruby_load DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}") + read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am RUBY_SCRIPTS) + install(FILES ${RUBY_SCRIPTS} + DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}") endif() - target_link_libraries(ruby_load libgroonga) endif() diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am index 381fb47160b..a4949727d1a 100644 --- a/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am +++ b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am @@ -1,29 +1,9 @@ EXTRA_DIST = \ CMakeLists.txt -AM_CFLAGS = \ - $(MESSAGE_PACK_CFLAGS) \ - $(MRUBY_CFLAGS) - -AM_CPPFLAGS = \ - -I$(top_builddir) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/lib - -AM_LDFLAGS = \ - -avoid-version \ - -module \ - -no-undefined - -LIBS = \ - $(top_builddir)/lib/libgroonga.la \ - $(MESSAGE_PACK_LIBS) - if WITH_MRUBY -ruby_plugins_LTLIBRARIES = \ - eval.la \ - load.la +dist_ruby_plugins_DATA = \ + $(ruby_scripts) endif -include eval_sources.am -include load_sources.am +include sources.am diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval.c b/storage/mroonga/vendor/groonga/plugins/ruby/eval.c deleted file mode 100644 index bacd9011c12..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/ruby/eval.c +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ -/* - Copyright(C) 2013 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifdef GRN_EMBEDDED -# define GRN_PLUGIN_FUNCTION_TAG ruby_eval -#endif - -#include "ruby_plugin.h" - -static grn_obj * -command_ruby_eval(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - mrb_state *mrb = ctx->impl->mrb.state; - grn_obj *script; - mrb_value result; - - script = VAR(0); - switch (script->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - break; - default : - { - grn_obj inspected; - GRN_TEXT_INIT(&inspected, 0); - grn_inspect(ctx, &inspected, script); - ERR(GRN_INVALID_ARGUMENT, "script must be a string: <%.*s>", - (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); - GRN_OBJ_FIN(ctx, &inspected); - return NULL; - } - break; - } - - mrb->exc = NULL; - result = grn_mrb_eval(ctx, GRN_TEXT_VALUE(script), GRN_TEXT_LEN(script)); - output_result(ctx, result); - - return NULL; -} - -grn_rc -GRN_PLUGIN_REGISTER(grn_ctx *ctx) -{ - grn_expr_var vars[1]; - - grn_plugin_expr_var_init(ctx, &vars[0], "script", -1); - grn_plugin_command_create(ctx, "ruby_eval", -1, command_ruby_eval, 1, vars); - - return ctx->rc; -} diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb b/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb new file mode 100644 index 00000000000..e7619cf2a9f --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb @@ -0,0 +1,36 @@ +module Groonga + module Ruby + class EvalCommand < Command + register("ruby_eval", + [ + "script", + ]) + + def run_body(input) + script = input[:script] + unless script.is_a?(String) + message = "script must be a string: <#{script.inspect}>" + raise Groonga::InvalidArgument, message + end + + eval_context = EvalContext.new + begin + result = eval_context.eval(script) + rescue Exception => error + writer.map("result", 1) do + writer.write("exception") + writer.map("exception", 1) do + writer.write("message") + writer.write(error.message) + end + end + else + writer.map("result", 1) do + writer.write("value") + writer.write(result) + end + end + end + end + end +end diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am deleted file mode 100644 index 08543e43fb4..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am +++ /dev/null @@ -1,3 +0,0 @@ -eval_la_SOURCES = \ - ruby_plugin.h \ - eval.c diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/load.c b/storage/mroonga/vendor/groonga/plugins/ruby/load.c deleted file mode 100644 index 447882319a5..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/ruby/load.c +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ -/* - Copyright(C) 2013 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifdef GRN_EMBEDDED -# define GRN_PLUGIN_FUNCTION_TAG ruby_load -#endif - -#include "ruby_plugin.h" - -static grn_obj * -command_ruby_load(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - grn_obj *path; - mrb_value result; - - path = VAR(0); - switch (path->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - break; - default : - { - grn_obj inspected; - GRN_TEXT_INIT(&inspected, 0); - grn_inspect(ctx, &inspected, path); - ERR(GRN_INVALID_ARGUMENT, "path must be a string: <%.*s>", - (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); - GRN_OBJ_FIN(ctx, &inspected); - return NULL; - } - break; - } - - GRN_TEXT_PUTC(ctx, path, '\0'); - result = grn_mrb_load(ctx, GRN_TEXT_VALUE(path)); - output_result(ctx, result); - - return NULL; -} - -grn_rc -GRN_PLUGIN_REGISTER(grn_ctx *ctx) -{ - grn_expr_var vars[1]; - - grn_plugin_expr_var_init(ctx, &vars[0], "path", -1); - grn_plugin_command_create(ctx, "ruby_load", -1, command_ruby_load, 1, vars); - - return ctx->rc; -} diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am deleted file mode 100644 index d1cce258caa..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am +++ /dev/null @@ -1,3 +0,0 @@ -load_la_SOURCES = \ - ruby_plugin.h \ - load.c diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h b/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h deleted file mode 100644 index 57cab2885b1..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ -/* - Copyright(C) 2013 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include <grn_mrb.h> -#include <grn_output.h> -#include <grn_db.h> -#include <grn_ctx_impl.h> -#include <grn_util.h> - -#include <groonga/plugin.h> - -#include <mruby.h> - -#define VAR GRN_PROC_GET_VAR_BY_OFFSET - -static void -output_result(grn_ctx *ctx, mrb_value result) -{ - mrb_state *mrb = ctx->impl->mrb.state; - - GRN_OUTPUT_MAP_OPEN("result", 1); - if (mrb->exc) { - mrb_value mrb_message; - grn_obj grn_message; - GRN_OUTPUT_CSTR("exception"); - GRN_OUTPUT_MAP_OPEN("exception", 1); - GRN_OUTPUT_CSTR("message"); - mrb_message = mrb_funcall(mrb, mrb_obj_value(mrb->exc), "message", 0); - GRN_VOID_INIT(&grn_message); - if (grn_mrb_to_grn(ctx, mrb_message, &grn_message) == GRN_SUCCESS) { - GRN_OUTPUT_OBJ(&grn_message, NULL); - } else { - GRN_OUTPUT_CSTR("unsupported message type"); - } - grn_obj_unlink(ctx, &grn_message); - GRN_OUTPUT_MAP_CLOSE(); - } else { - grn_obj grn_result; - GRN_OUTPUT_CSTR("value"); - GRN_VOID_INIT(&grn_result); - if (grn_mrb_to_grn(ctx, result, &grn_result) == GRN_SUCCESS) { - GRN_OUTPUT_OBJ(&grn_result, NULL); - } else { - GRN_OUTPUT_CSTR("unsupported return value"); - } - grn_obj_unlink(ctx, &grn_result); - } - GRN_OUTPUT_MAP_CLOSE(); -} - -grn_rc -GRN_PLUGIN_INIT(grn_ctx *ctx) -{ - return GRN_SUCCESS; -} - -grn_rc -GRN_PLUGIN_FIN(grn_ctx *ctx) -{ - return GRN_SUCCESS; -} diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/sources.am new file mode 100644 index 00000000000..f8938291af8 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/ruby/sources.am @@ -0,0 +1,2 @@ +ruby_scripts = \ + eval.rb diff --git a/storage/mroonga/vendor/groonga/plugins/sharding.rb b/storage/mroonga/vendor/groonga/plugins/sharding.rb index 4ebc9baf438..86401c1fa4d 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding.rb @@ -1,6 +1,11 @@ +require "sharding/parameters" require "sharding/range_expression_builder" require "sharding/logical_enumerator" + +require "sharding/logical_parameters" + require "sharding/logical_count" require "sharding/logical_range_filter" require "sharding/logical_select" +require "sharding/logical_shard_list" require "sharding/logical_table_remove" diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb index 610dae834d3..8bdd77ef79f 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb @@ -17,25 +17,59 @@ module Groonga filter = input[:filter] total = 0 - enumerator.each do |table, shard_key, shard_range| - total += count_n_records(table, filter, - shard_key, shard_range, + enumerator.each do |shard, shard_range| + total += count_n_records(filter, shard, shard_range, enumerator.target_range) end writer.write(total) end private - def count_n_records(table, filter, - shard_key, shard_range, - target_range) + def cache_key(input) + key = "logical_count\0" + key << "#{input[:logical_table]}\0" + key << "#{input[:shard_key]}\0" + key << "#{input[:min]}\0" + key << "#{input[:min_border]}\0" + key << "#{input[:max]}\0" + key << "#{input[:max_border]}\0" + key << "#{input[:filter]}\0" + key + end + + def log_use_range_index(use, table_name, line, method) + message = "[logical_count]" + if use + message << "[range-index]" + else + message << "[select]" + end + message << " <#{table_name}>" + Context.instance.logger.log(Logger::Level::DEBUG, + __FILE__, + line, + method.to_s, + message) + end + + def count_n_records(filter, shard, shard_range, target_range) cover_type = target_range.cover_type(shard_range) return 0 if cover_type == :none + shard_key = shard.key + if shard_key.nil? + message = "[logical_count] shard_key doesn't exist: " + + "<#{shard.key_name}>" + raise InvalidArgument, message + end + table = shard.table + table_name = shard.table_name + expression_builder = RangeExpressionBuilder.new(shard_key, - target_range, - filter) + target_range) + expression_builder.filter = filter if cover_type == :all + log_use_range_index(false, table_name, __LINE__, __method__) if filter.nil? return table.size else @@ -53,6 +87,9 @@ module Groonga end end + use_range_index = (!range_index.nil?) + log_use_range_index(use_range_index, table_name, __LINE__, __method__) + case cover_type when :partial_min if range_index diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb index 35934182591..d05a220fc23 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb @@ -1,12 +1,15 @@ module Groonga module Sharding class LogicalEnumerator + include Enumerable + attr_reader :target_range attr_reader :logical_table attr_reader :shard_key_name - def initialize(command_name, input) + def initialize(command_name, input, options={}) @command_name = command_name @input = input + @options = options initialize_parameters end @@ -22,7 +25,6 @@ module Groonga def each_internal(order) context = Context.instance each_shard_with_around(order) do |prev_shard, current_shard, next_shard| - table = current_shard.table shard_range_data = current_shard.range_data shard_range = nil @@ -52,16 +54,7 @@ module Groonga shard_range_data.day) end - physical_shard_key_name = "#{table.name}.#{@shard_key_name}" - shard_key = context[physical_shard_key_name] - if shard_key.nil? - message = - "[#{@command_name}] shard_key doesn't exist: " + - "<#{physical_shard_key_name}>" - raise InvalidArgument, message - end - - yield(table, shard_key, shard_range) + yield(current_shard, shard_range) end end @@ -70,10 +63,10 @@ module Groonga prefix = "#{@logical_table}_" shards = [nil] - context.database.each_table(:prefix => prefix, - :order_by => :key, - :order => order) do |table| - shard_range_raw = table.name[prefix.size..-1] + context.database.each_name(:prefix => prefix, + :order_by => :key, + :order => order) do |name| + shard_range_raw = name[prefix.size..-1] case shard_range_raw when /\A(\d{4})(\d{2})\z/ @@ -84,7 +77,7 @@ module Groonga next end - shards << Shard.new(table, shard_range_data) + shards << Shard.new(name, @shard_key_name, shard_range_data) next if shards.size < 3 yield(*shards) shards.shift @@ -104,7 +97,11 @@ module Groonga @shard_key_name = @input[:shard_key] if @shard_key_name.nil? - raise InvalidArgument, "[#{@command_name}] shard_key is missing" + require_shard_key = @options[:require_shard_key] + require_shard_key = true if require_shard_key.nil? + if require_shard_key + raise InvalidArgument, "[#{@command_name}] shard_key is missing" + end end @target_range = TargetRange.new(@command_name, @input) @@ -119,11 +116,24 @@ module Groonga end class Shard - attr_reader :table, :range_data - def initialize(table, range_data) - @table = table + attr_reader :table_name, :key_name, :range_data + def initialize(table_name, key_name, range_data) + @table_name = table_name + @key_name = key_name @range_data = range_data end + + def table + @table ||= Context.instance[@table_name] + end + + def full_key_name + "#{@table_name}.#{@key_name}" + end + + def key + @key ||= Context.instance[full_key_name] + end end class ShardRangeData @@ -133,6 +143,14 @@ module Groonga @month = month @day = day end + + def to_suffix + if @day.nil? + "_%04d%02d" % [@year, @month] + else + "_%04d%02d%02d" % [@year, @month, @day] + end + end end class DayShardRange @@ -144,7 +162,11 @@ module Groonga end def least_over_time - Time.local(@year, @month, @day + 1) + next_day = Time.local(@year, @month, @day) + (60 * 60 * 24) + while next_day.day == @day # For leap second + next_day += 1 + end + next_day end def min_time @@ -168,9 +190,13 @@ module Groonga def least_over_time if @max_day.nil? - Time.local(@year, @month + 1, 1) + if @month == 12 + Time.local(@year + 1, 1, 1) + else + Time.local(@year, @month + 1, 1) + end else - Time.local(@year, @month, @max_day + 1) + Time.local(@year, @month, @max_day) end end @@ -270,10 +296,7 @@ module Groonga return true if @min_border == :exclude - not (@min.hour == 0 and - @min.min == 0 and - @min.sec == 0 and - @min.usec == 0) + shard_range.min_time != @min end def in_max?(shard_range) diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb new file mode 100644 index 00000000000..75ff569bf7c --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb @@ -0,0 +1,44 @@ +module Groonga + module Sharding + class LogicalParametersCommand < Command + register("logical_parameters", + [ + "range_index", + ]) + + def run_body(input) + range_index = parse_range_index(input[:range_index]) + + parameters = [ + :range_index, + ] + writer.map("parameters", parameters.size) do + parameters.each do |name| + writer.write(name.to_s) + writer.write(Parameters.__send__(name)) + end + end + + Parameters.range_index = range_index if range_index + end + + private + def parse_range_index(value) + case value + when nil + nil + when "auto" + :auto + when "always" + :always + when "never" + :never + else + message = "[logical_parameters][range_index] " + message << "must be auto, always or never: <#{value}>" + raise InvalidArgument, message + end + end + end + end +end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb index 94ae8600d2b..1c8f8644695 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb @@ -14,6 +14,7 @@ module Groonga "offset", "limit", "output_columns", + "use_range_index", ]) def run_body(input) @@ -55,7 +56,26 @@ module Groonga end end + private + def cache_key(input) + key = "logical_range_filter\0" + key << "#{input[:logical_table]}\0" + key << "#{input[:shard_key]}\0" + key << "#{input[:min]}\0" + key << "#{input[:min_border]}\0" + key << "#{input[:max]}\0" + key << "#{input[:max_border]}\0" + key << "#{input[:order]}\0" + key << "#{input[:filter]}\0" + key << "#{input[:offset]}\0" + key << "#{input[:limit]}\0" + key << "#{input[:output_columns]}\0" + key << "#{input[:use_range_index]}\0" + key + end + class ExecuteContext + attr_reader :use_range_index attr_reader :enumerator attr_reader :order attr_reader :filter @@ -68,6 +88,7 @@ module Groonga attr_reader :threshold def initialize(input) @input = input + @use_range_index = parse_use_range_index(@input[:use_range_index]) @enumerator = LogicalEnumerator.new("logical_range_filter", @input) @order = parse_order(@input, :order) @filter = @input[:filter] @@ -93,6 +114,17 @@ module Groonga end private + def parse_use_range_index(use_range_index) + case use_range_index + when "yes" + true + when "no" + false + else + nil + end + end + def parse_order(input, name) order = input[name] return :ascending if order.nil? @@ -123,23 +155,21 @@ module Groonga end def execute - first_table = nil + first_shard = nil enumerator = @context.enumerator + target_range = enumerator.target_range if @context.order == :descending each_method = :reverse_each else each_method = :each end - enumerator.send(each_method) do |table, shard_key, shard_range| - first_table ||= table - next if table.empty? - - shard_executor = ShardExecutor.new(@context, - table, shard_key, shard_range) + enumerator.send(each_method) do |shard, shard_range| + first_shard ||= shard + shard_executor = ShardExecutor.new(@context, shard, shard_range) shard_executor.execute break if @context.current_limit == 0 end - if first_table.nil? + if first_shard.nil? message = "[logical_range_filter] no shard exists: " + "logical_table: <#{enumerator.logical_table}>: " + @@ -148,17 +178,16 @@ module Groonga end if @context.result_sets.empty? result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC, - :key_type => first_table) + :key_type => first_shard.table) @context.result_sets << result_set end end end class ShardExecutor - def initialize(context, table, shard_key, shard_range) + def initialize(context, shard, shard_range) @context = context - @table = table - @shard_key = shard_key + @shard = shard @shard_range = shard_range @filter = @context.filter @@ -168,137 +197,279 @@ module Groonga @target_range = @context.enumerator.target_range @cover_type = @target_range.cover_type(@shard_range) - - @expression_builder = RangeExpressionBuilder.new(@shard_key, - @target_range, - @filter) end def execute return if @cover_type == :none + return if @shard.table.empty? + + shard_key = @shard.key + if shard_key.nil? + message = "[logical_range_filter] shard_key doesn't exist: " + + "<#{@shard.key_name}>" + raise InvalidArgument, message + end + + expression_builder = RangeExpressionBuilder.new(shard_key, + @target_range) + expression_builder.filter = @filter - index_info = @shard_key.find_index(Operator::LESS) + index_info = shard_key.find_index(Operator::LESS) if index_info range_index = index_info.index - range_index = nil unless use_range_index?(range_index) + unless use_range_index?(range_index, expression_builder) + range_index = nil + end else range_index = nil end - case @cover_type - when :all - filter_shard_all(range_index) - when :partial_min - if range_index - filter_by_range(range_index, - @target_range.min, @target_range.min_border, - nil, nil) - else - filter_table do |expression| - @expression_builder.build_partial_min(expression) - end - end - when :partial_max - if range_index - filter_by_range(range_index, - nil, nil, - @target_range.max, @target_range.max_border) - else - filter_table do |expression| - @expression_builder.build_partial_max(expression) - end - end - when :partial_min_and_max - if range_index - filter_by_range(range_index, - @target_range.min, @target_range.min_border, - @target_range.max, @target_range.max_border) - else - filter_table do |expression| - @expression_builder.build_partial_min_and_max(expression) - end - end - end + execute_filter(range_index, expression_builder) end private - def use_range_index?(range_index) + def decide_use_range_index(use, reason, line, method) + message = "[logical_range_filter]" + if use + message << "[range-index] " + else + message << "[select] " + end + message << "<#{@shard.table_name}>: " + message << reason + Context.instance.logger.log(Logger::Level::DEBUG, + __FILE__, + line, + method.to_s, + message) + + use + end + + def use_range_index?(range_index, expression_builder) + use_range_index_parameter_message = + "force by use_range_index parameter" + case @context.use_range_index + when true + return decide_use_range_index(true, + use_range_index_parameter_message, + __LINE__, __method__) + when false + return decide_use_range_index(false, + use_range_index_parameter_message, + __LINE__, __method__) + end + + range_index_logical_parameter_message = + "force by range_index logical parameter" + case Parameters.range_index + when :always + return decide_use_range_index(true, + range_index_logical_parameter_message, + __LINE__, __method__) + when :never + return decide_use_range_index(false, + range_index_logical_parameter_message, + __LINE__, __method__) + end + current_limit = @context.current_limit if current_limit < 0 - return false + reason = "limit is negative: <#{current_limit}>" + return decide_use_range_index(false, reason, + __LINE__, __method__) end required_n_records = @context.current_offset + current_limit - max_n_records = @table.size + max_n_records = @shard.table.size if max_n_records <= required_n_records - return false + reason = "the number of required records (#{required_n_records}) " + reason << ">= " + reason << "the number of records in shard (#{max_n_records})" + return decide_use_range_index(false, reason, + __LINE__, __method__) end threshold = @context.threshold if threshold <= 0.0 - return true + reason = "threshold is negative: <#{threshold}>" + return decide_use_range_index(true, reason, + __LINE__, __method__) end if threshold >= 1.0 - return false + reason = "threshold (#{threshold}) >= 1.0" + return decide_use_range_index(false, reason, + __LINE__, __method__) end + table = @shard.table estimated_n_records = 0 case @cover_type when :all if @filter - create_expression(@table) do |expression| - @expression_builder.build_all(expression) - estimated_n_records = expression.estimate_size(@table) + create_expression(table) do |expression| + expression_builder.build_all(expression) + unless range_index_available_expression?(expression, + __LINE__, __method__) + return false + end + estimated_n_records = expression.estimate_size(table) end else estimated_n_records = max_n_records end when :partial_min - create_expression(@table) do |expression| - @expression_builder.build_partial_min(expression) - estimated_n_records = expression.estimate_size(@table) + create_expression(table) do |expression| + expression_builder.build_partial_min(expression) + unless range_index_available_expression?(expression, + __LINE__, __method__) + return false + end + estimated_n_records = expression.estimate_size(table) end when :partial_max - create_expression(@table) do |expression| - @expression_builder.build_partial_max(expression) - estimated_n_records = expression.estimate_size(@table) + create_expression(table) do |expression| + expression_builder.build_partial_max(expression) + unless range_index_available_expression?(expression, + __LINE__, __method__) + return false + end + estimated_n_records = expression.estimate_size(table) end when :partial_min_and_max - create_expression(@table) do |expression| - @expression_builder.build_partial_min_and_max(expression) - estimated_n_records = expression.estimate_size(@table) + create_expression(table) do |expression| + expression_builder.build_partial_min_and_max(expression) + unless range_index_available_expression?(expression, + __LINE__, __method__) + return false + end + estimated_n_records = expression.estimate_size(table) end end if estimated_n_records <= required_n_records - return false + reason = "the number of required records (#{required_n_records}) " + reason << ">= " + reason << "the number of estimated records (#{estimated_n_records})" + return decide_use_range_index(false, reason, + __LINE__, __method__) end hit_ratio = estimated_n_records / max_n_records.to_f - hit_ratio >= threshold + use_range_index_by_hit_ratio = (hit_ratio >= threshold) + if use_range_index_by_hit_ratio + relation = ">=" + else + relation = "<" + end + reason = "hit ratio " + reason << "(#{hit_ratio}=#{estimated_n_records}/#{max_n_records}) " + reason << "#{relation} threshold (#{threshold})" + decide_use_range_index(use_range_index_by_hit_ratio, reason, + __LINE__, __method__) + end + + def range_index_available_expression?(expression, line, method_name) + nested_reference_vector_column_accessor = + find_nested_reference_vector_column_accessor(expression) + if nested_reference_vector_column_accessor + reason = "nested reference vector column accessor can't be used: " + reason << "<#{nested_reference_vector_column_accessor.name}>" + return decide_use_range_index(false, reason, line, method_name) + end + + selector_only_procedure = find_selector_only_procedure(expression) + if selector_only_procedure + reason = "selector only procedure can't be used: " + reason << "<#{selector_only_procedure.name}>" + return decide_use_range_index(false, reason, line, method_name) + end + + true + end + + def find_nested_reference_vector_column_accessor(expression) + expression.codes.each do |code| + value = code.value + next unless value.is_a?(Accessor) + + sub_accessor = value + while sub_accessor.have_next? + object = sub_accessor.object + return value if object.is_a?(Column) and object.vector? + sub_accessor = sub_accessor.next + end + end + nil + end + + def find_selector_only_procedure(expression) + expression.codes.each do |code| + value = code.value + return value if value.is_a?(Procedure) and value.selector_only? + end + nil + end + + def execute_filter(range_index, expression_builder) + case @cover_type + when :all + filter_shard_all(range_index, expression_builder) + when :partial_min + if range_index + filter_by_range(range_index, expression_builder, + @target_range.min, @target_range.min_border, + nil, nil) + else + filter_table do |expression| + expression_builder.build_partial_min(expression) + end + end + when :partial_max + if range_index + filter_by_range(range_index, expression_builder, + nil, nil, + @target_range.max, @target_range.max_border) + else + filter_table do |expression| + expression_builder.build_partial_max(expression) + end + end + when :partial_min_and_max + if range_index + filter_by_range(range_index, expression_builder, + @target_range.min, @target_range.min_border, + @target_range.max, @target_range.max_border) + else + filter_table do |expression| + expression_builder.build_partial_min_and_max(expression) + end + end + end end - def filter_shard_all(range_index) + def filter_shard_all(range_index, expression_builder) + table = @shard.table if @filter.nil? - if @table.size <= @context.current_offset - @context.current_offset -= @table.size + if table.size <= @context.current_offset + @context.current_offset -= table.size return end if range_index - filter_by_range(range_index, + filter_by_range(range_index, expression_builder, nil, nil, nil, nil) else - sort_result_set(@table) + sort_result_set(table) end else if range_index - filter_by_range(range_index, + filter_by_range(range_index, expression_builder, nil, nil, nil, nil) else filter_table do |expression| - @expression_builder.build_all(expression) + expression_builder.build_all(expression) end end end @@ -313,7 +484,7 @@ module Groonga end end - def filter_by_range(range_index, + def filter_by_range(range_index, expression_builder, min, min_border, max, max_border) lexicon = range_index.domain data_table = range_index.range @@ -336,6 +507,10 @@ module Groonga else options[:limit] = current_limit end + max_n_unmatched_records = + compute_max_n_unmatched_records(data_table.size, + options[:limit]) + options[:max_n_unmatched_records] = max_n_unmatched_records if @filter create_expression(data_table) do |expression| expression.parse(@filter) @@ -349,6 +524,17 @@ module Groonga n_matched_records = index_cursor.select(result_set, options) end end + if n_matched_records == -1 + result_set.close + fallback_message = + "fallback because there are too much unmatched records: " + fallback_message << "<#{max_n_unmatched_records}>" + decide_use_range_index(false, + fallback_message, + __LINE__, __method__) + execute_filter(nil, expression_builder) + return + end end rescue result_set.close @@ -393,10 +579,24 @@ module Groonga flags end + def compute_max_n_unmatched_records(data_table_size, limit) + max_n_unmatched_records = limit * 100 + max_n_sample_records = data_table_size + if max_n_sample_records > 10000 + sample_ratio = 1 / (Math.log(data_table_size) ** 2) + max_n_sample_records = (max_n_sample_records * sample_ratio).ceil + end + if max_n_unmatched_records > max_n_sample_records + max_n_unmatched_records = max_n_sample_records + end + max_n_unmatched_records + end + def filter_table - create_expression(@table) do |expression| + table = @shard.table + create_expression(table) do |expression| yield(expression) - result_set = @table.select(expression) + result_set = table.select(expression) sort_result_set(result_set) end end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb index da6dbe5ae91..07ebf9e8e0d 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb @@ -10,32 +10,50 @@ module Groonga "max", "max_border", "filter", + # Deprecated since 6.1.5. Use sort_keys instead. "sortby", "output_columns", "offset", "limit", "drilldown", + # Deprecated since 6.1.5. Use drilldown_sort_keys instead. "drilldown_sortby", "drilldown_output_columns", "drilldown_offset", "drilldown_limit", + "drilldown_calc_types", + "drilldown_calc_target", + "sort_keys", + "drilldown_sort_keys", + "match_columns", + "query", + "drilldown_filter", ]) def run_body(input) - enumerator = LogicalEnumerator.new("logical_select", input) - context = ExecuteContext.new(input) begin executor = Executor.new(context) executor.execute n_results = 1 - drilldowns = context.drilldown.result_sets - n_results += drilldowns.size + n_plain_drilldowns = context.plain_drilldown.n_result_sets + n_labeled_drilldowns = context.labeled_drilldowns.n_result_sets + if n_plain_drilldowns > 0 + n_results += n_plain_drilldowns + elsif + if n_labeled_drilldowns > 0 + n_results += 1 + end + end writer.array("RESULT", n_results) do write_records(writer, context) - write_drilldowns(writer, context, drilldowns) + if n_plain_drilldowns > 0 + write_plain_drilldowns(writer, context) + elsif n_labeled_drilldowns > 0 + write_labeled_drilldowns(writer, context) + end end ensure context.close @@ -43,6 +61,68 @@ module Groonga end private + def cache_key(input) + sort_keys = input[:sort_keys] || input[:sortby] + drilldown_sort_keys = + input[:drilldown_sort_keys] || input[:drilldown_sortby] + key = "logical_select\0" + key << "#{input[:logical_table]}\0" + key << "#{input[:shard_key]}\0" + key << "#{input[:min]}\0" + key << "#{input[:min_border]}\0" + key << "#{input[:max]}\0" + key << "#{input[:max_border]}\0" + key << "#{input[:filter]}\0" + key << "#{sort_keys}\0" + key << "#{input[:output_columns]}\0" + key << "#{input[:offset]}\0" + key << "#{input[:limit]}\0" + key << "#{input[:drilldown]}\0" + key << "#{drilldown_sort_keys}\0" + key << "#{input[:match_columns]}\0" + key << "#{input[:query]}\0" + key << "#{input[:drilldown_output_columns]}\0" + key << "#{input[:drilldown_offset]}\0" + key << "#{input[:drilldown_limit]}\0" + key << "#{input[:drilldown_calc_types]}\0" + key << "#{input[:drilldown_calc_target]}\0" + key << "#{input[:drilldown_filter]}\0" + labeled_drilldowns = LabeledDrilldowns.parse(input).sort_by(&:label) + labeled_drilldowns.each do |drilldown| + key << "#{drilldown.label}\0" + key << "#{drilldown.keys.join(',')}\0" + key << "#{drilldown.output_columns}\0" + key << "#{drilldown.offset}\0" + key << "#{drilldown.limit}\0" + key << "#{drilldown.calc_types}\0" + key << "#{drilldown.calc_target_name}\0" + key << "#{drilldown.filter}\0" + cache_key_dynamic_columns(key, drilldown.dynamic_columns) + end + dynamic_columns = DynamicColumns.parse(input) + cache_key_dynamic_columns(key, dynamic_columns) + key + end + + def cache_key_dynamic_columns(key, dynamic_columns) + [ + :initial, + :filtered, + :output + ].each do |stage| + target_dynamic_columns = dynamic_columns.__send__("each_#{stage}").to_a + target_dynamic_columns.sort_by(&:label).each do |dynamic_column| + key << "#{dynamic_column.label}\0" + key << "#{dynamic_column.stage}\0" + key << "#{dynamic_column.type}\0" + key << "#{dynamic_column.flags}\0" + key << "#{dynamic_column.value}\0" + key << "#{dynamic_column.window_sort_keys.join(',')}\0" + key << "#{dynamic_column.window_group_keys.join(',')}\0" + end + end + end + def write_records(writer, context) result_sets = context.result_sets @@ -75,11 +155,11 @@ module Groonga result_sets.each do |result_set| if result_set.size > current_offset writer.write_table_records(result_set, output_columns, options) + current_limit -= result_set.size end if current_offset > 0 current_offset = [current_offset - result_set.size, 0].max end - current_limit -= result_set.size break if current_limit <= 0 options[:offset] = current_offset options[:limit] = current_limit @@ -87,12 +167,14 @@ module Groonga end end - def write_drilldowns(writer, context, drilldowns) - output_columns = context.drilldown.output_columns + def write_plain_drilldowns(writer, execute_context) + plain_drilldown = execute_context.plain_drilldown + drilldowns = plain_drilldown.result_sets + output_columns = plain_drilldown.output_columns options = { - :offset => context.drilldown.output_offset, - :limit => context.drilldown.limit, + :offset => plain_drilldown.offset, + :limit => plain_drilldown.limit, } drilldowns.each do |drilldown| @@ -109,6 +191,60 @@ module Groonga end end + def write_labeled_drilldowns(writer, execute_context) + labeled_drilldowns = execute_context.labeled_drilldowns + is_command_version1 = (context.command_version == 1) + + writer.map("DRILLDOWNS", labeled_drilldowns.n_result_sets) do + labeled_drilldowns.each do |drilldown| + writer.write(drilldown.label) + + result_set = drilldown.result_set + n_elements = 2 # for N hits and columns + n_elements += result_set.size + output_columns = drilldown.output_columns + options = { + :offset => drilldown.offset, + :limit => drilldown.limit, + } + + writer.array("RESULTSET", n_elements) do + writer.array("NHITS", 1) do + writer.write(result_set.size) + end + writer.write_table_columns(result_set, output_columns) + if is_command_version1 and drilldown.need_command_version2? + context.with_command_version(2) do + writer.write_table_records(result_set, + drilldown.output_columns_v2, + options) + end + else + writer.write_table_records(result_set, output_columns, options) + end + end + end + end + end + + class LabeledArgumentParser + def initialize(parameters) + @parameters = parameters + end + + def parse(prefix_pattern) + pattern = /\A#{prefix_pattern}\[(.+?)\]\.(.+)\z/ + labeled_arguments = {} + @parameters.each do |key, value| + match_data = pattern.match(key) + next if match_data.nil? + labeled_argument = (labeled_arguments[match_data[1]] ||= {}) + labeled_argument[match_data[2]] = value + end + labeled_arguments + end + end + module KeysParsable private def parse_keys(raw_keys) @@ -118,68 +254,273 @@ module Groonga end end + module Calculatable + def calc_target(table) + return nil if @calc_target_name.nil? + table.find_column(@calc_target_name) + end + + private + def parse_calc_types(raw_types) + return TableGroupFlags::CALC_COUNT if raw_types.nil? + + types = 0 + raw_types.strip.split(/ *, */).each do |name| + case name + when "COUNT" + types |= TableGroupFlags::CALC_COUNT + when "MAX" + types |= TableGroupFlags::CALC_MAX + when "MIN" + types |= TableGroupFlags::CALC_MIN + when "SUM" + types |= TableGroupFlags::CALC_SUM + when "AVG" + types |= TableGroupFlags::CALC_AVG + when "NONE" + # Do nothing + else + raise InvalidArgument, "invalid drilldown calc type: <#{name}>" + end + end + types + end + end + class ExecuteContext include KeysParsable attr_reader :enumerator + attr_reader :match_columns + attr_reader :query attr_reader :filter attr_reader :offset attr_reader :limit attr_reader :sort_keys attr_reader :output_columns + attr_reader :dynamic_columns attr_reader :result_sets - attr_reader :drilldown + attr_reader :unsorted_result_sets + attr_reader :plain_drilldown + attr_reader :labeled_drilldowns + attr_reader :temporary_tables + attr_reader :expressions def initialize(input) @input = input @enumerator = LogicalEnumerator.new("logical_select", @input) + @match_columns = @input[:match_columns] + @query = @input[:query] @filter = @input[:filter] @offset = (@input[:offset] || 0).to_i @limit = (@input[:limit] || 10).to_i - @sort_keys = parse_keys(@input[:sortby]) - @output_columns = @input[:output_columns] || "_key, *" + @sort_keys = parse_keys(@input[:sort_keys] || @input[:sortby]) + @output_columns = @input[:output_columns] || "_id, _key, *" + + @dynamic_columns = DynamicColumns.parse(@input) @result_sets = [] + @unsorted_result_sets = [] + + @plain_drilldown = PlainDrilldownExecuteContext.new(@input) + @labeled_drilldowns = LabeledDrilldowns.parse(@input) + + @temporary_tables = [] - @drilldown = DrilldownExecuteContext.new(@input) + @expressions = [] end def close @result_sets.each do |result_set| result_set.close if result_set.temporary? end + @unsorted_result_sets.each do |result_set| + result_set.close if result_set.temporary? + end + + @plain_drilldown.close + @labeled_drilldowns.close + + @dynamic_columns.close + + @temporary_tables.each do |table| + table.close + end - @drilldown.close + @expressions.each do |expression| + expression.close + end end end - class DrilldownExecuteContext + class DynamicColumns + class << self + def parse(input) + parser = LabeledArgumentParser.new(input) + columns = parser.parse(/columns?/) + + initial_contexts = [] + filtered_contexts = [] + output_contexts = [] + columns.each do |label, parameters| + contexts = nil + case parameters["stage"] + when "initial" + contexts = initial_contexts + when "filtered" + contexts = filtered_contexts + when "output" + contexts = output_contexts + else + next + end + contexts << DynamicColumnExecuteContext.new(label, parameters) + end + + new(initial_contexts, + filtered_contexts, + output_contexts) + end + end + + def initialize(initial_contexts, + filtered_contexts, + output_contexts) + @initial_contexts = initial_contexts + @filtered_contexts = filtered_contexts + @output_contexts = output_contexts + end + + def each_initial(&block) + @initial_contexts.each(&block) + end + + def each_filtered(&block) + @filtered_contexts.each(&block) + end + + def each_output(&block) + @output_contexts.each(&block) + end + + def close + @initial_contexts.each do |context| + context.close + end + @filtered_contexts.each do |context| + context.close + end + @output_contexts.each do |context| + context.close + end + end + end + + class DynamicColumnExecuteContext include KeysParsable + attr_reader :label + attr_reader :stage + attr_reader :type + attr_reader :flags + attr_reader :value + attr_reader :window_sort_keys + attr_reader :window_group_keys + def initialize(label, parameters) + @label = label + @stage = parameters["stage"] + @type = parse_type(parameters["type"]) + @flags = parse_flags(parameters["flags"] || "COLUMN_SCALAR") + @value = parameters["value"] + @window_sort_keys = parse_keys(parameters["window.sort_keys"]) + @window_group_keys = parse_keys(parameters["window.group_keys"]) + end + + def close + end + + def apply(table, condition=nil) + column = table.create_column(@label, @flags, @type) + return if table.empty? + + expression = Expression.create(table) + begin + expression.parse(@value) + if @window_sort_keys.empty? and @window_group_keys.empty? + expression.condition = condition if condition + table.apply_expression(column, expression) + else + table.apply_window_function(column, expression, + :sort_keys => @window_sort_keys, + :group_keys => @window_group_keys) + end + ensure + expression.close + end + end + + private + def parse_type(type_raw) + return nil if type_raw.nil? + + type = Context.instance[type_raw] + if type.nil? + message = "#{error_message_tag} unknown type: <#{type_raw}>" + raise InvalidArgument, message + end + + case type + when Type, Table + type + else + message = "#{error_message_tag} invalid type: #{type.grn_inspect}" + raise InvalidArgument, message + end + end + + def parse_flags(flags_raw) + Column.parse_flags(error_message_tag, flags_raw) + end + + def error_message_tag + "[logical_select][columns][#{@stage}][#{@label}]" + end + end + + class PlainDrilldownExecuteContext + include KeysParsable + include Calculatable + attr_reader :keys attr_reader :offset attr_reader :limit attr_reader :sort_keys attr_reader :output_columns - attr_reader :output_offset + attr_reader :calc_target_name + attr_reader :calc_types + attr_reader :filter attr_reader :result_sets attr_reader :unsorted_result_sets + attr_reader :temporary_tables + attr_reader :expressions def initialize(input) @input = input @keys = parse_keys(@input[:drilldown]) @offset = (@input[:drilldown_offset] || 0).to_i @limit = (@input[:drilldown_limit] || 10).to_i - @sort_keys = parse_keys(@input[:drilldown_sortby]) + @sort_keys = parse_keys(@input[:drilldown_sort_keys] || + @input[:drilldown_sortby]) @output_columns = @input[:drilldown_output_columns] @output_columns ||= "_key, _nsubrecs" - - if @sort_keys.empty? - @output_offset = @offset - else - @output_offset = 0 - end + @calc_target_name = @input[:drilldown_calc_target] + @calc_types = parse_calc_types(@input[:drilldown_calc_types]) + @filter = @input[:drilldown_filter] @result_sets = [] @unsorted_result_sets = [] + + @temporary_tables = [] + + @expressions = [] end def close @@ -189,6 +530,170 @@ module Groonga @unsorted_result_sets.each do |result_set| result_set.close end + + @temporary_tables.each do |table| + table.close + end + + @expressions.each do |expression| + expression.close + end + end + + def have_keys? + @keys.size > 0 + end + + def n_result_sets + @result_sets.size + end + end + + class LabeledDrilldowns + include Enumerable + include TSort + + class << self + def parse(input) + parser = LabeledArgumentParser.new(input) + drilldowns = parser.parse(/drilldowns?/) + + contexts = {} + drilldowns.each do |label, parameters| + next if parameters["keys"].nil? + context = LabeledDrilldownExecuteContext.new(label, parameters) + contexts[label] = context + end + + new(contexts) + end + end + + def initialize(contexts) + @contexts = contexts + @dependencies = {} + @contexts.each do |label, context| + if context.table + depended_context = @contexts[context.table] + if depended_context.nil? + raise "Unknown drilldown: <#{context.table}>" + end + @dependencies[label] = [depended_context] + else + @dependencies[label] = [] + end + end + end + + def close + @contexts.each_value do |context| + context.close + end + end + + def [](label) + @contexts[label] + end + + def have_keys? + not @contexts.empty? + end + + def n_result_sets + @contexts.size + end + + def each(&block) + @contexts.each_value(&block) + end + + def tsort_each_node(&block) + @contexts.each_value(&block) + end + + def tsort_each_child(context, &block) + @dependencies[context.label].each(&block) + end + end + + class LabeledDrilldownExecuteContext + include KeysParsable + include Calculatable + + attr_reader :label + attr_reader :keys + attr_reader :offset + attr_reader :limit + attr_reader :sort_keys + attr_reader :output_columns + attr_reader :calc_target_name + attr_reader :calc_types + attr_reader :filter + attr_reader :table + attr_reader :dynamic_columns + attr_accessor :result_set + attr_accessor :unsorted_result_set + attr_reader :temporary_tables + attr_reader :expressions + def initialize(label, parameters) + @label = label + @keys = parse_keys(parameters["keys"]) + @offset = (parameters["offset"] || 0).to_i + @limit = (parameters["limit"] || 10).to_i + @sort_keys = parse_keys(parameters["sort_keys"] || + parameters["sortby"]) + @output_columns = parameters["output_columns"] + @output_columns ||= "_key, _nsubrecs" + @calc_target_name = parameters["calc_target"] + @calc_types = parse_calc_types(parameters["calc_types"]) + @filter = parameters["filter"] + @table = parameters["table"] + + @dynamic_columns = DynamicColumns.parse(parameters) + + @result_set = nil + @unsorted_result_set = nil + + @temporary_tables = [] + + @expressions = [] + end + + def close + @result_set.close if @result_set + @unsorted_result_set.close if @unsorted_result_set + + @dynamic_columns.close + + @temporary_tables.each do |table| + table.close + end + + @expressions.each do |expression| + expression.close + end + end + + def need_command_version2? + /[.\[]/ === @output_columns + end + + def output_columns_v2 + columns = @output_columns.strip.split(/ *, */) + converted_columns = columns.collect do |column| + match_data = /\A_value\.(.+)\z/.match(column) + if match_data.nil? + column + else + nth_key = keys.index(match_data[1]) + if nth_key + "_key[#{nth_key}]" + else + column + end + end + end + converted_columns.join(",") end end @@ -199,22 +704,23 @@ module Groonga def execute execute_search - execute_drilldown + if @context.plain_drilldown.have_keys? + execute_plain_drilldown + elsif @context.labeled_drilldowns.have_keys? + execute_labeled_drilldowns + end end private def execute_search - first_table = nil + first_shard = nil enumerator = @context.enumerator - enumerator.each do |table, shard_key, shard_range| - first_table ||= table - next if table.empty? - - shard_executor = ShardExecutor.new(@context, - table, shard_key, shard_range) + enumerator.each do |shard, shard_range| + first_shard ||= shard + shard_executor = ShardExecutor.new(@context, shard, shard_range) shard_executor.execute end - if first_table.nil? + if first_shard.nil? message = "[logical_select] no shard exists: " + "logical_table: <#{enumerator.logical_table}>: " + @@ -223,33 +729,38 @@ module Groonga end if @context.result_sets.empty? result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC, - :key_type => first_table) + :key_type => first_shard.table) + @context.dynamic_columns.each_initial do |dynamic_column| + dynamic_column.apply(result_set) + end + @context.dynamic_columns.each_filtered do |dynamic_column| + dynamic_column.apply(result_set) + end @context.result_sets << result_set end end - def execute_drilldown - drilldown = @context.drilldown + def execute_plain_drilldown + drilldown = @context.plain_drilldown group_result = TableGroupResult.new - sort_options = { - :offset => drilldown.offset, - :limit => drilldown.limit, - } begin group_result.key_begin = 0 group_result.key_end = 0 group_result.limit = 1 - group_result.flags = TableGroupFlags::CALC_COUNT + group_result.flags = drilldown.calc_types drilldown.keys.each do |key| @context.result_sets.each do |result_set| - result_set.group([key], group_result) + with_calc_target(group_result, + drilldown.calc_target(result_set)) do + result_set.group([key], group_result) + end end result_set = group_result.table + result_set = apply_drilldown_filter(drilldown, result_set) if drilldown.sort_keys.empty? drilldown.result_sets << result_set else - drilldown.result_sets << result_set.sort(drilldown.sort_keys, - sort_options) + drilldown.result_sets << result_set.sort(drilldown.sort_keys) drilldown.unsorted_result_sets << result_set end group_result.table = nil @@ -258,72 +769,204 @@ module Groonga group_result.close end end + + def execute_labeled_drilldowns + drilldowns = @context.labeled_drilldowns + + drilldowns.tsort_each do |drilldown| + group_result = TableGroupResult.new + keys = drilldown.keys + begin + group_result.key_begin = 0 + group_result.key_end = keys.size - 1 + if keys.size > 1 + group_result.max_n_sub_records = 1 + end + group_result.limit = 1 + group_result.flags = drilldown.calc_types + if drilldown.table + target_table = drilldowns[drilldown.table].result_set + with_calc_target(group_result, + drilldown.calc_target(target_table)) do + target_table.group(keys, group_result) + end + else + @context.result_sets.each do |result_set| + with_calc_target(group_result, + drilldown.calc_target(result_set)) do + result_set.group(keys, group_result) + end + end + end + result_set = group_result.table + drilldown.dynamic_columns.each_initial do |dynamic_column| + dynamic_column.apply(result_set) + end + result_set = apply_drilldown_filter(drilldown, result_set) + if drilldown.sort_keys.empty? + drilldown.result_set = result_set + else + drilldown.result_set = result_set.sort(drilldown.sort_keys) + drilldown.unsorted_result_set = result_set + end + group_result.table = nil + ensure + group_result.close + end + end + end + + def with_calc_target(group_result, calc_target) + group_result.calc_target = calc_target + begin + yield + ensure + calc_target.close if calc_target + group_result.calc_target = nil + end + end + + def apply_drilldown_filter(drilldown, result_set) + filter = drilldown.filter + return result_set if filter.nil? + + expression = Expression.create(result_set) + drilldown.expressions << expression + expression.parse(filter) + filtered_result_set = result_set.select(expression) + drilldown.temporary_tables << result_set + filtered_result_set + end end class ShardExecutor - def initialize(context, table, shard_key, shard_range) + def initialize(context, shard, shard_range) @context = context - @table = table - @shard_key = shard_key + @shard = shard @shard_range = shard_range + @target_table = @shard.table + + @match_columns = @context.match_columns + @query = @context.query @filter = @context.filter + @sort_keys = @context.sort_keys @result_sets = @context.result_sets + @unsorted_result_sets = @context.unsorted_result_sets @target_range = @context.enumerator.target_range @cover_type = @target_range.cover_type(@shard_range) - - @expression_builder = RangeExpressionBuilder.new(@shard_key, - @target_range, - @filter) end def execute return if @cover_type == :none + return if @target_table.empty? - case @cover_type - when :all - filter_shard_all - when :partial_min - filter_table do |expression| - @expression_builder.build_partial_min(expression) - end - when :partial_max - filter_table do |expression| - @expression_builder.build_partial_max(expression) + shard_key = @shard.key + if shard_key.nil? + message = "[logical_select] shard_key doesn't exist: " + + "<#{@shard.key_name}>" + raise InvalidArgument, message + end + + @context.dynamic_columns.each_initial do |dynamic_column| + if @target_table == @shard.table + @target_table = create_all_match_table(@target_table) + @context.temporary_tables << @target_table end - when :partial_min_and_max - filter_table do |expression| - @expression_builder.build_partial_min_and_max(expression) + dynamic_column.apply(@target_table) + end + + create_expression_builder(shard_key) do |expression_builder| + case @cover_type + when :all + filter_shard_all(expression_builder) + when :partial_min + filter_table do |expression| + expression_builder.build_partial_min(expression) + end + when :partial_max + filter_table do |expression| + expression_builder.build_partial_max(expression) + end + when :partial_min_and_max + filter_table do |expression| + expression_builder.build_partial_min_and_max(expression) + end end end end private - def filter_shard_all - if @filter.nil? - @result_sets << @table + def filter_shard_all(expression_builder) + if @query.nil? and @filter.nil? + add_result_set(@target_table, nil) + @context.temporary_tables.delete(@target_table) else filter_table do |expression| - @expression_builder.build_all(expression) + expression_builder.build_all(expression) end end end def create_expression(table) expression = Expression.create(table) + @context.expressions << expression + expression + end + + def create_expression_builder(shard_key) + expression_builder = RangeExpressionBuilder.new(shard_key, + @target_range) + expression_builder.match_columns = @match_columns + expression_builder.query = @query + expression_builder.filter = @filter begin - yield(expression) + yield(expression_builder) ensure - expression.close + expression = expression_builder.match_columns_expression + @context.expressions << expression if expression end end def filter_table - create_expression(@table) do |expression| - yield(expression) - @result_sets << @table.select(expression) + table = @target_table + expression = create_expression(table) + yield(expression) + add_result_set(table.select(expression), expression) + end + + def add_result_set(result_set, condition) + if result_set.empty? + result_set.close + return + end + + @context.dynamic_columns.each_filtered do |dynamic_column| + if result_set == @shard.table + @context.temporary_tables << result_set + result_set = create_all_match_table(result_set) + end + dynamic_column.apply(result_set, condition) + end + + if @sort_keys.empty? + @result_sets << result_set + else + @unsorted_result_sets << result_set + sorted_result_set = result_set.sort(@sort_keys) + @result_sets << sorted_result_set + end + end + + def create_all_match_table(table) + expression = Expression.create(table) + begin + expression.append_constant(true, Operator::PUSH, 1) + table.select(expression) + ensure + expression.close end end end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb new file mode 100644 index 00000000000..b8ef3f76520 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb @@ -0,0 +1,28 @@ +module Groonga + module Sharding + class LogicalShardListCommand < Command + register("logical_shard_list", + [ + "logical_table", + ]) + + def run_body(input) + enumerator = LogicalEnumerator.new("logical_shard_list", + input, + :require_shard_key => false) + shard_names = enumerator.collect do |current_shard, shard_range| + current_shard.table_name + end + + writer.array("shards", shard_names.size) do + shard_names.each do |shard_name| + writer.map("shard", 1) do + writer.write("name") + writer.write(shard_name) + end + end + end + end + end + end +end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb index f5c31c72bb1..3353d6c3a83 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb @@ -9,48 +9,323 @@ module Groonga "min_border", "max", "max_border", + "dependent", + "force", ]) def run_body(input) + @dependent = (input[:dependent] == "yes") + @force = (input[:force] == "yes") + enumerator = LogicalEnumerator.new("logical_table_remove", input) - succeess = true - enumerator.each do |table, shard_key, shard_range| - remove_table(table, - shard_key, - shard_range, - enumerator.target_range) + success = true + enumerator.each do |shard, shard_range| + remove_shard(shard, shard_range, enumerator.target_range) end - writer.write(succeess) + writer.write(success) end private - def remove_table(table, shard_key, shard_range, target_range) + def remove_shard(shard, shard_range, target_range) cover_type = target_range.cover_type(shard_range) return if cover_type == :none - expression_builder = RangeExpressionBuilder.new(shard_key, - target_range, - nil) + shard_key = shard.key + if shard_key.nil? + if @force + context.clear_error + else + message = + "[logical_table_remove] shard_key doesn't exist: " + + "<#{shard.key_name}>" + raise InvalidArgument, message + end + end + table = shard.table + if cover_type == :all or ((table.nil? or shard_key.nil?) and @force) + remove_table(shard, table) + return + end + + expression_builder = RangeExpressionBuilder.new(shard_key, + target_range) case cover_type - when :all - table.remove when :partial_min remove_records(table) do |expression| expression_builder.build_partial_min(expression) end - table.remove if table.empty? + remove_table(shard, table) if table.empty? when :partial_max remove_records(table) do |expression| expression_builder.build_partial_max(expression) end - table.remove if table.empty? + remove_table(shard, table) if table.empty? when :partial_min_and_max remove_records(table) do |expression| expression_builder.build_partial_min_and_max(expression) end - table.remove if table.empty? + remove_table(shard, table) if table.empty? + end + end + + def collect_referenced_table_ids_from_index_ids(index_ids, + referenced_table_ids) + database = context.database + index_ids.each do |index_id| + index = context[index_id] + if index.nil? + context.clear_error + index_name = database[index_id] + lexicon_name = index_name.split(".", 2)[0] + lexicon_id = database[lexicon_name] + referenced_table_ids << lexicon_id if lexicon_id + else + referenced_table_ids << index.domain_id + end + end + end + + def collect_referenced_table_ids_from_column_name(column_name, + referenced_table_ids) + database = context.database + column_id = database[column_name] + database.each_raw do |id, cursor| + next if ID.builtin?(id) + next if id == column_id + + context.open_temporary(id) do |object| + if object.nil? + context.clear_error + next + end + + case object + when IndexColumn + if object.source_ids.include?(column_id) + collect_referenced_table_ids_from_index_ids([id], + referenced_table_ids) + end + end + end + end + end + + def collect_referenced_table_ids_from_column(column, + referenced_table_ids) + range = column.range + case range + when nil + context.clear_error + when Table + referenced_table_ids << range.id + collect_referenced_table_ids_from_index_ids(range.index_ids, + referenced_table_ids) + end + collect_referenced_table_ids_from_index_ids(column.index_ids, + referenced_table_ids) + end + + def collect_referenced_table_ids_from_column_names(column_names) + referenced_table_ids = [] + column_names.each do |column_name| + column = context[column_name] + if column.nil? + context.clear_error + collect_referenced_table_ids_from_column_name(column_name, + referenced_table_ids) + else + collect_referenced_table_ids_from_column(column, + referenced_table_ids) + end + end + referenced_table_ids + end + + def collect_referenced_table_ids(shard, table) + return [] unless @dependent + + column_names = nil + if table + begin + column_names = table.columns.collect(&:name) + rescue + context.clear_error + end + end + if column_names.nil? + prefix = "#{shard.table_name}." + column_names = [] + context.database.each_name(:prefix => prefix) do |column_name| + column_names << column_name + end + end + + collect_referenced_table_ids_from_column_names(column_names) + end + + def remove_table(shard, table) + if table.nil? + unless @force + if context.rc == Context::RC::SUCCESS.to_i + error_class = InvalidArgument + else + rc = Context::RC.find(context.rc) + error_class = rc.error_class + end + message = "[logical_table_remove] table is broken: " + + "<#{shard.table_name}>: #{context.error_message}" + raise error_class, message + end + context.clear_error + end + + referenced_table_ids = collect_referenced_table_ids(shard, table) + + if table.nil? + remove_table_force(shard.table_name) + else + options = {:dependent => @dependent} + if @force + begin + table.remove(options) + rescue + context.clear_error + table.close + remove_table_force(shard.table_name) + end + else + table.remove(options) + end + end + + remove_referenced_tables(shard, referenced_table_ids) + end + + def remove_table_force(table_name) + database = context.database + + prefix = "#{table_name}." + database.each_raw(:prefix => prefix) do |id, cursor| + column = context[id] + if column.nil? + context.clear_error + column_name = cursor.key + remove_column_force(column_name) + table = context[table_name] + if table.nil? + context.clear_error + else + table.close + end + else + remove_column(column) + end + end + + table_id = database[table_name] + return if table_id.nil? + + database.each_raw do |id, cursor| + next if ID.builtin?(id) + next if id == table_id + + context.open_temporary(id) do |object| + if object.nil? + context.clear_error + next + end + + case object + when Table + if object.domain_id == table_id + begin + object.remove(:dependent => @dependent) + rescue + context.clear_error + reference_table_name = object.name + object.close + remove_table_force(reference_table_name) + end + end + when Column + if object.range_id == table_id + remove_column(object) + end + end + end + end + + Object.remove_force(table_name) + end + + def remove_column(column) + begin + column.remove(:dependent => @dependent) + rescue + context.clear_error + column_name = column.name + column.close + remove_column_force(column_name) + end + end + + def remove_column_force(column_name) + database = context.database + + column_id = database[column_name] + + column = context[column_id] + if column.nil? + context.clear_error + else + column.index_ids.each do |id| + index_column = context[id] + if index_column.nil? + context.clear_error + index_column_name = database[id] + remove_column_force(index_column_name) + else + remove_column(index_column) + end + end + column.close + end + + Object.remove_force(column_name) + end + + def remove_referenced_tables(shard, referenced_table_ids) + return if referenced_table_ids.empty? + + database = context.database + shard_suffix = shard.range_data.to_suffix + referenced_table_ids.uniq.each do |referenced_table_id| + referenced_table_name = database[referenced_table_id] + next if referenced_table_name.nil? + next unless referenced_table_name.end_with?(shard_suffix) + + referenced_table = context[referenced_table_id] + if referenced_table.nil? + context.clear_error + if @force + Object.remove_force(referenced_table_name) + end + next + end + + if @force + begin + referenced_table.remove(:dependent => @dependent) + rescue + context.clear_error + referenced_table.close + remove_table_force(referenced_table_name) + end + else + referenced_table.remove(:dependent => @dependent) + end end end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb b/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb new file mode 100644 index 00000000000..b09a9d6c254 --- /dev/null +++ b/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb @@ -0,0 +1,10 @@ +module Groonga + module Sharding + module Parameters + @range_index = :auto + class << self + attr_accessor :range_index + end + end + end +end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb b/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb index 2e2dd29c51c..cc80735d4c5 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb +++ b/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb @@ -1,16 +1,23 @@ module Groonga module Sharding class RangeExpressionBuilder - def initialize(key, target_range, filter) + attr_reader :match_columns_expression + + attr_writer :match_columns + attr_writer :query + attr_writer :filter + + def initialize(key, target_range) @key = key @target_range = target_range - @filter = filter + @match_columns_expression = nil + @match_columns = nil + @query = nil + @filter = nil end def build_all(expression) - return if @filter.nil? - - expression.parse(@filter) + build_condition(expression) end def build_partial_min(expression) @@ -22,10 +29,7 @@ module Groonga else expression.append_operator(Operator::GREATER, 2) end - if @filter - expression.parse(@filter) - expression.append_operator(Operator::AND, 2) - end + build_condition(expression) end def build_partial_max(expression) @@ -37,10 +41,7 @@ module Groonga else expression.append_operator(Operator::LESS, 2) end - if @filter - expression.parse(@filter) - expression.append_operator(Operator::AND, 2) - end + build_condition(expression) end def build_partial_min_and_max(expression) @@ -55,9 +56,31 @@ module Groonga expression.append_constant(@target_range.max_border, Operator::PUSH, 1) expression.append_operator(Operator::CALL, 5) + build_condition(expression) + end + + private + def build_condition(expression) + if @query + is_empty = expression.empty? + if @match_columns + table = Context.instance[expression[0].domain] + @match_columns_expression = Expression.create(table) + @match_columns_expression.parse(@match_columns) + end + flags = Expression::SYNTAX_QUERY | + Expression::ALLOW_PRAGMA | + Expression::ALLOW_COLUMN + expression.parse(@query, + default_column: @match_columns_expression, + flags: flags) + expression.append_operator(Operator::AND, 2) unless is_empty + end + if @filter + is_empty = expression.empty? expression.parse(@filter) - expression.append_operator(Operator::AND, 2) + expression.append_operator(Operator::AND, 2) unless is_empty end end end diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/sources.am b/storage/mroonga/vendor/groonga/plugins/sharding/sources.am index 14e56609cae..df2b6d023da 100644 --- a/storage/mroonga/vendor/groonga/plugins/sharding/sources.am +++ b/storage/mroonga/vendor/groonga/plugins/sharding/sources.am @@ -1,7 +1,10 @@ sharding_scripts = \ logical_count.rb \ logical_enumerator.rb \ + logical_parameters.rb \ logical_range_filter.rb \ logical_select.rb \ + logical_shard_list.rb \ logical_table_remove.rb \ + parameters.rb \ range_expression_builder.rb diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt index 03375f97adb..e7d5364979b 100644 --- a/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt @@ -15,7 +15,8 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../../lib - ${MRUBY_INCLUDE_DIRS}) + ${MRUBY_INCLUDE_DIRS} + ${MESSAGE_PACK_INCLUDE_DIRS}) read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am SUGGEST_SOURCES) set_source_files_properties(${SUGGEST_SOURCES} diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c index 073f5e00103..4dd5f6e62ca 100644 --- a/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c +++ b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c @@ -28,13 +28,6 @@ #include "grn_output.h" #include <groonga/plugin.h> -#ifdef HAVE__STRNICMP -# ifdef strncasecmp -# undef strncasecmp -# endif /* strcasecmp */ -# define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n) -#endif /* HAVE__STRNICMP */ - #define VAR GRN_PROC_GET_VAR_BY_OFFSET #define CONST_STR_LEN(x) x, x ? sizeof(x) - 1 : 0 #define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x) @@ -166,7 +159,7 @@ cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id i } if ((c = grn_ii_cursor_open(ctx, (grn_ii *)co, id, GRN_ID_NIL, GRN_ID_MAX, ((grn_ii *)co)->n_elements - 1, 0))) { - grn_ii_posting *p; + grn_posting *p; grn_obj post, pair_freq, item_freq, item_freq2, item_boost; GRN_RECORD_INIT(&post, 0, grn_obj_id(ctx, items)); GRN_INT32_INIT(&pair_freq, 0); @@ -330,7 +323,7 @@ complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col, grn_ii_cursor *icur; if ((icur = grn_ii_cursor_open(ctx, (grn_ii *)index, id, GRN_ID_NIL, GRN_ID_MAX, 1, 0))) { - grn_ii_posting *p; + grn_posting *p; while ((p = grn_ii_cursor_next(ctx, icur))) { complete_add_item(ctx, p->rid, res, frequency_threshold, items_freq, items_boost, @@ -536,10 +529,10 @@ parse_search_mode(grn_ctx *ctx, grn_obj *mode_text) mode_length = GRN_TEXT_LEN(mode_text); if (mode_length == 3 && - strncasecmp("yes", GRN_TEXT_VALUE(mode_text), 3) == 0) { + grn_strncasecmp("yes", GRN_TEXT_VALUE(mode_text), 3) == 0) { mode = GRN_SUGGEST_SEARCH_YES; } else if (mode_length == 2 && - strncasecmp("no", GRN_TEXT_VALUE(mode_text), 2) == 0) { + grn_strncasecmp("no", GRN_TEXT_VALUE(mode_text), 2) == 0) { mode = GRN_SUGGEST_SEARCH_NO; } else { mode = GRN_SUGGEST_SEARCH_AUTO; @@ -923,10 +916,10 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t), &added); if (added) { - GRN_RECORD_SET(ctx, pre_item, tid); - grn_obj_set_value(ctx, learner->pairs_pre, pair_id, + GRN_RECORD_SET(ctx, pre_item, tid); + grn_obj_set_value(ctx, learner->pairs_pre, pair_id, pre_item, GRN_OBJ_SET); - grn_obj_set_value(ctx, learner->pairs_post, pair_id, + grn_obj_set_value(ctx, learner->pairs_post, pair_id, post_item, GRN_OBJ_SET); } if (!token_ids) { diff --git a/storage/mroonga/vendor/groonga/plugins/table/Makefile.am b/storage/mroonga/vendor/groonga/plugins/table/Makefile.am deleted file mode 100644 index 4b49b0a3224..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/table/Makefile.am +++ /dev/null @@ -1,24 +0,0 @@ -EXTRA_DIST = \ - CMakeLists.txt - -AM_CFLAGS = \ - $(MESSAGE_PACK_CFLAGS) \ - $(MRUBY_CFLAGS) - -AM_CPPFLAGS = \ - -I$(top_builddir) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/lib - -AM_LDFLAGS = \ - -avoid-version \ - -module \ - -no-undefined - -LIBS = \ - $(top_builddir)/lib/libgroonga.la \ - $(MESSAGE_PACK_LIBS) - -table_plugins_LTLIBRARIES = table.la - -include sources.am diff --git a/storage/mroonga/vendor/groonga/plugins/table/sources.am b/storage/mroonga/vendor/groonga/plugins/table/sources.am deleted file mode 100644 index 943e79b1f5c..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/table/sources.am +++ /dev/null @@ -1,2 +0,0 @@ -table_la_SOURCES = \ - table.c diff --git a/storage/mroonga/vendor/groonga/plugins/table/table.c b/storage/mroonga/vendor/groonga/plugins/table/table.c deleted file mode 100644 index 3b0c09d9a91..00000000000 --- a/storage/mroonga/vendor/groonga/plugins/table/table.c +++ /dev/null @@ -1,747 +0,0 @@ -/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ -/* Copyright(C) 2012 Brazil - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include <string.h> - -#include "grn_ctx.h" -#include "grn_db.h" -#include "grn_output.h" -#include "grn_util.h" -#include <groonga/plugin.h> - -#define VAR GRN_PROC_GET_VAR_BY_OFFSET -#define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x) - -static grn_obj * -grn_ctx_get_table_by_name_or_id(grn_ctx *ctx, - const char *name, unsigned int name_len) -{ - grn_obj *table; - const char *end = name + name_len; - const char *rest = NULL; - grn_id id = grn_atoui(name, end, &rest); - if (rest == end) { - table = grn_ctx_at(ctx, id); - } else { - table = grn_ctx_get(ctx, name, name_len); - } - if (!GRN_OBJ_TABLEP(table)) { - ERR(GRN_INVALID_ARGUMENT, "invalid table name: <%.*s>", name_len, name); - if (table) { - grn_obj_unlink(ctx, table); - table = NULL; - } - } - return table; -} - -static void -grn_output_table_name_or_id(grn_ctx *ctx, grn_obj *table) -{ - if (table) { - if (((grn_db_obj *)table)->id & GRN_OBJ_TMP_OBJECT) { - GRN_OUTPUT_INT64(((grn_db_obj *)table)->id); - } else { - int name_len; - char name_buf[GRN_TABLE_MAX_KEY_SIZE]; - name_len = grn_obj_name(ctx, table, name_buf, GRN_TABLE_MAX_KEY_SIZE); - GRN_OUTPUT_STR(name_buf, name_len); - } - } else { - GRN_OUTPUT_INT64(0); - } -} - -static grn_bool -parse_bool_value(grn_ctx *ctx, grn_obj *text) -{ - grn_bool value = GRN_FALSE; - if (GRN_TEXT_LEN(text) == 3 && - memcmp("yes", GRN_TEXT_VALUE(text), 3) == 0) { - value = GRN_TRUE; - } - return value; -} - -static grn_operator -parse_set_operator_value(grn_ctx *ctx, grn_obj *text) -{ - grn_operator value = GRN_OP_OR; - if (GRN_TEXT_LEN(text) == 3) { - if (memcmp("and", GRN_TEXT_VALUE(text), 3) == 0) { - value = GRN_OP_AND; - } else if (memcmp("but", GRN_TEXT_VALUE(text), 3) == 0) { - value = GRN_OP_AND_NOT; - } - } else if (GRN_TEXT_LEN(text) == 6 && - memcmp("adjust", GRN_TEXT_VALUE(text), 6) == 0) { - value = GRN_OP_ADJUST; - } else if (GRN_TEXT_LEN(text) == 7 && - memcmp("and_not", GRN_TEXT_VALUE(text), 7) == 0) { - value = GRN_OP_AND_NOT; - } - return value; -} - -static grn_obj * -command_match(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_obj *result_set = NULL; - grn_obj *table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0))); - if (table) { - grn_expr_flags flags = GRN_EXPR_SYNTAX_QUERY; - grn_obj *v, *query, *columns = NULL; - GRN_EXPR_CREATE_FOR_QUERY(ctx, table, query, v); - if (query) { - if (GRN_TEXT_LEN(VAR(1))) { - GRN_EXPR_CREATE_FOR_QUERY(ctx, table, columns, v); - if (columns) { - grn_expr_parse(ctx, columns, TEXT_VALUE_LEN(VAR(1)), - NULL, GRN_OP_MATCH, GRN_OP_AND, - GRN_EXPR_SYNTAX_SCRIPT); - } - } - if (parse_bool_value(ctx, VAR(5))) { - flags |= GRN_EXPR_ALLOW_COLUMN; - } - if (parse_bool_value(ctx, VAR(6))) { - flags |= GRN_EXPR_ALLOW_PRAGMA; - } - grn_expr_parse(ctx, query, TEXT_VALUE_LEN(VAR(2)), - columns, GRN_OP_MATCH, GRN_OP_AND, flags); - if (GRN_TEXT_LEN(VAR(3))) { - result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(3))); - } else { - result_set = grn_table_create(ctx, NULL, 0, NULL, - GRN_TABLE_HASH_KEY| - GRN_OBJ_WITH_SUBREC, - table, NULL); - } - if (result_set) { - grn_table_select(ctx, table, query, result_set, - parse_set_operator_value(ctx, VAR(4))); - } - grn_obj_unlink(ctx, columns); - grn_obj_unlink(ctx, query); - } - } - grn_output_table_name_or_id(ctx, result_set); - return NULL; -} - -static grn_obj * -command_filter_by_script(grn_ctx *ctx, int nargs, - grn_obj **args, grn_user_data *user_data) -{ - grn_obj *result_set = NULL; - grn_obj *table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0))); - if (table) { - grn_expr_flags flags = GRN_EXPR_SYNTAX_SCRIPT; - grn_obj *v, *query; - GRN_EXPR_CREATE_FOR_QUERY(ctx, table, query, v); - if (query) { - if (parse_bool_value(ctx, VAR(4))) { - flags |= GRN_EXPR_ALLOW_UPDATE; - } - grn_expr_parse(ctx, query, TEXT_VALUE_LEN(VAR(1)), - NULL, GRN_OP_MATCH, GRN_OP_AND, flags); - if (GRN_TEXT_LEN(VAR(2))) { - result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(2))); - } else { - result_set = grn_table_create(ctx, NULL, 0, NULL, - GRN_TABLE_HASH_KEY| - GRN_OBJ_WITH_SUBREC, - table, NULL); - } - if (result_set) { - grn_table_select(ctx, table, query, result_set, - parse_set_operator_value(ctx, VAR(3))); - } - grn_obj_unlink(ctx, query); - } - } - grn_output_table_name_or_id(ctx, result_set); - return NULL; -} - -static grn_obj * -command_filter(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_operator operator = GRN_OP_NOP; - grn_obj *table, *column, *result_set = NULL; - if (!(table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0))))) { - goto exit; - } - if (!(column = grn_obj_column(ctx, table, TEXT_VALUE_LEN(VAR(1))))) { - ERR(GRN_INVALID_ARGUMENT, "invalid column name: <%.*s>", - (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1))); - goto exit; - } - if (GRN_TEXT_LEN(VAR(2)) == 0) { - ERR(GRN_INVALID_ARGUMENT, "missing mandatory argument: operator"); - goto exit; - } else { - uint32_t operator_len = GRN_TEXT_LEN(VAR(2)); - const char *operator_text = GRN_TEXT_VALUE(VAR(2)); - switch (operator_text[0]) { - case '<' : - if (operator_len == 1) { - operator = GRN_OP_LESS; - } - break; - } - if (operator == GRN_OP_NOP) { - ERR(GRN_INVALID_ARGUMENT, "invalid operator: <%.*s>", - operator_len, operator_text); - goto exit; - } - } - if (GRN_TEXT_LEN(VAR(4))) { - result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(4))); - } else { - result_set = grn_table_create(ctx, NULL, 0, NULL, - GRN_TABLE_HASH_KEY| - GRN_OBJ_WITH_SUBREC, - table, NULL); - } - if (result_set) { - grn_column_filter(ctx, column, operator, VAR(3), result_set, - parse_set_operator_value(ctx, VAR(5))); - } -exit : - grn_output_table_name_or_id(ctx, result_set); - return NULL; -} - -static grn_obj * -command_group(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - const char *table = GRN_TEXT_VALUE(VAR(0)); - unsigned int table_len = GRN_TEXT_LEN(VAR(0)); - const char *key = GRN_TEXT_VALUE(VAR(1)); - unsigned int key_len = GRN_TEXT_LEN(VAR(1)); - const char *set = GRN_TEXT_VALUE(VAR(2)); - unsigned int set_len = GRN_TEXT_LEN(VAR(2)); - grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len); - grn_obj *set_ = NULL; - if (table_) { - uint32_t ngkeys; - grn_table_sort_key *gkeys; - gkeys = grn_table_sort_key_from_str(ctx, key, key_len, table_, &ngkeys); - if (gkeys) { - if (set_len) { - set_ = grn_ctx_get_table_by_name_or_id(ctx, set, set_len); - } else { - set_ = grn_table_create_for_group(ctx, NULL, 0, NULL, - gkeys[0].key, table_, 0); - } - if (set_) { - if (GRN_TEXT_LEN(VAR(3))) { - uint32_t gap = grn_atoui(GRN_TEXT_VALUE(VAR(3)), - GRN_BULK_CURR(VAR(3)), NULL); - grn_table_group_with_range_gap(ctx, table_, gkeys, set_, gap); - } else { - grn_table_group_result g = { - set_, 0, 0, 1, - GRN_TABLE_GROUP_CALC_COUNT, 0 - }; - grn_table_group(ctx, table_, gkeys, 1, &g, 1); - } - } - grn_table_sort_key_close(ctx, gkeys, ngkeys); - } - } - grn_output_table_name_or_id(ctx, set_); - return NULL; -} - -#define DEFAULT_LIMIT 10 - -static grn_obj * -command_sort(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - const char *table = GRN_TEXT_VALUE(VAR(0)); - unsigned int table_len = GRN_TEXT_LEN(VAR(0)); - const char *keys = GRN_TEXT_VALUE(VAR(1)); - unsigned int keys_len = GRN_TEXT_LEN(VAR(1)); - int offset = GRN_TEXT_LEN(VAR(2)) - ? grn_atoi(GRN_TEXT_VALUE(VAR(2)), GRN_BULK_CURR(VAR(2)), NULL) - : 0; - int limit = GRN_TEXT_LEN(VAR(3)) - ? grn_atoi(GRN_TEXT_VALUE(VAR(3)), GRN_BULK_CURR(VAR(3)), NULL) - : DEFAULT_LIMIT; - grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len); - grn_obj *sorted = NULL; - if (table_) { - uint32_t nkeys; - grn_table_sort_key *keys_; - if (keys_len && - (keys_ = grn_table_sort_key_from_str(ctx, keys, keys_len, - table_, &nkeys))) { - if ((sorted = grn_table_create(ctx, NULL, 0, NULL, - GRN_OBJ_TABLE_NO_KEY, NULL, table_))) { - int table_size = (int)grn_table_size(ctx, table_); - grn_normalize_offset_and_limit(ctx, table_size, &offset, &limit); - grn_table_sort(ctx, table_, offset, limit, sorted, keys_, nkeys); - grn_table_sort_key_close(ctx, keys_, nkeys); - } - } - } - grn_output_table_name_or_id(ctx, sorted); - return NULL; -} - -static grn_obj * -command_output(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - const char *table = GRN_TEXT_VALUE(VAR(0)); - unsigned int table_len = GRN_TEXT_LEN(VAR(0)); - const char *columns = GRN_TEXT_VALUE(VAR(1)); - unsigned int columns_len = GRN_TEXT_LEN(VAR(1)); - int offset = GRN_TEXT_LEN(VAR(2)) - ? grn_atoi(GRN_TEXT_VALUE(VAR(2)), GRN_BULK_CURR(VAR(2)), NULL) - : 0; - int limit = GRN_TEXT_LEN(VAR(3)) - ? grn_atoi(GRN_TEXT_VALUE(VAR(3)), GRN_BULK_CURR(VAR(3)), NULL) - : DEFAULT_LIMIT; - grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len); - if (table_) { - grn_obj_format format; - int table_size = (int)grn_table_size(ctx, table_); - GRN_OBJ_FORMAT_INIT(&format, table_size, 0, limit, offset); - format.flags = - GRN_OBJ_FORMAT_WITH_COLUMN_NAMES| - GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET; - /* TODO: accept only comma separated expr as columns */ - grn_obj_columns(ctx, table_, columns, columns_len, &format.columns); - GRN_OUTPUT_OBJ(table_, &format); - GRN_OBJ_FORMAT_FIN(ctx, &format); - } - return NULL; -} - -static grn_obj * -command_each(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - const char *table = GRN_TEXT_VALUE(VAR(0)); - unsigned int table_len = GRN_TEXT_LEN(VAR(0)); - const char *expr = GRN_TEXT_VALUE(VAR(1)); - unsigned int expr_len = GRN_TEXT_LEN(VAR(1)); - grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len); - if (table_) { - grn_obj *v, *expr_; - GRN_EXPR_CREATE_FOR_QUERY(ctx, table_, expr_, v); - if (expr_ && v) { - grn_table_cursor *tc; - grn_expr_parse(ctx, expr_, expr, expr_len, - NULL, GRN_OP_MATCH, GRN_OP_AND, - GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE); - if ((tc = grn_table_cursor_open(ctx, table_, NULL, 0, - NULL, 0, 0, -1, 0))) { - grn_id id; - while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { - GRN_RECORD_SET(ctx, v, id); - grn_expr_exec(ctx, expr_, 0); - } - grn_table_cursor_close(ctx, tc); - } - grn_obj_unlink(ctx, expr_); - } - } - GRN_OUTPUT_BOOL(!ctx->rc); - return NULL; -} - -static grn_obj * -command_unlink(grn_ctx *ctx, int nargs, grn_obj **args, - grn_user_data *user_data) -{ - const char *table = GRN_TEXT_VALUE(VAR(0)); - unsigned int table_len = GRN_TEXT_LEN(VAR(0)); - grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len); - if (table_) { - grn_obj_unlink(ctx, table_); - } - GRN_OUTPUT_BOOL(!ctx->rc); - return NULL; -} - -static grn_obj * -command_add(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_load_(ctx, GRN_CONTENT_JSON, - GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)), - NULL, 0, - GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)), - NULL, 0, NULL, 0, 0); - GRN_OUTPUT_BOOL(ctx->impl->loader.nrecords); - if (ctx->impl->loader.table) { - grn_db_touch(ctx, DB_OBJ(ctx->impl->loader.table)->db); - } - return NULL; -} - -static grn_obj * -command_set(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - int table_name_len = GRN_TEXT_LEN(VAR(0)); - const char *table_name = GRN_TEXT_VALUE(VAR(0)); - grn_obj *table = grn_ctx_get(ctx, table_name, table_name_len); - if (table) { - grn_id id = GRN_ID_NIL; - int key_len = GRN_TEXT_LEN(VAR(2)); - int id_len = GRN_TEXT_LEN(VAR(5)); - if (key_len) { - const char *key = GRN_TEXT_VALUE(VAR(2)); - id = grn_table_get(ctx, table, key, key_len); - } else { - if (id_len) { - id = grn_atoui(GRN_TEXT_VALUE(VAR(5)), GRN_BULK_CURR(VAR(5)), NULL); - } - id = grn_table_at(ctx, table, id); - } - if (id) { - grn_obj obj; - grn_obj_format format; - GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id); - GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0); - GRN_RECORD_SET(ctx, &obj, id); - grn_obj_columns(ctx, table, - GRN_TEXT_VALUE(VAR(4)), - GRN_TEXT_LEN(VAR(4)), &format.columns); - format.flags = 0 /* GRN_OBJ_FORMAT_WITH_COLUMN_NAMES */; - GRN_OUTPUT_OBJ(&obj, &format); - GRN_OBJ_FORMAT_FIN(ctx, &format); - } - } else { - ERR(GRN_INVALID_ARGUMENT, - "nonexistent table name: <%.*s>", table_name_len, table_name); - } - return NULL; -} - -static grn_rc -command_get_resolve_parameters(grn_ctx *ctx, grn_user_data *user_data, - grn_obj **table, grn_id *id) -{ - const char *table_text, *id_text, *key_text; - int table_length, id_length, key_length; - - table_text = GRN_TEXT_VALUE(VAR(0)); - table_length = GRN_TEXT_LEN(VAR(0)); - if (table_length == 0) { - ERR(GRN_INVALID_ARGUMENT, "[table][get] table isn't specified"); - return ctx->rc; - } - - *table = grn_ctx_get(ctx, table_text, table_length); - if (!*table) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] table doesn't exist: <%.*s>", table_length, table_text); - return ctx->rc; - } - - key_text = GRN_TEXT_VALUE(VAR(1)); - key_length = GRN_TEXT_LEN(VAR(1)); - id_text = GRN_TEXT_VALUE(VAR(3)); - id_length = GRN_TEXT_LEN(VAR(3)); - switch ((*table)->header.type) { - case GRN_TABLE_NO_KEY: - if (key_length) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] should not specify key for NO_KEY table: <%.*s>: " - "table: <%.*s>", - key_length, key_text, - table_length, table_text); - return ctx->rc; - } - if (id_length) { - const char *rest = NULL; - *id = grn_atoi(id_text, id_text + id_length, &rest); - if (rest == id_text) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] ID should be a number: <%.*s>: table: <%.*s>", - id_length, id_text, - table_length, table_text); - } - } else { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] ID isn't specified: table: <%.*s>", - table_length, table_text); - } - break; - case GRN_TABLE_HASH_KEY: - case GRN_TABLE_PAT_KEY: - case GRN_TABLE_DAT_KEY: - if (key_length && id_length) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] should not specify both key and ID: " - "key: <%.*s>: ID: <%.*s>: table: <%.*s>", - key_length, key_text, - id_length, id_text, - table_length, table_text); - return ctx->rc; - } - if (key_length) { - *id = grn_table_get(ctx, *table, key_text, key_length); - if (!*id) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] nonexistent key: <%.*s>: table: <%.*s>", - key_length, key_text, - table_length, table_text); - } - } else { - if (id_length) { - const char *rest = NULL; - *id = grn_atoi(id_text, id_text + id_length, &rest); - if (rest == id_text) { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] ID should be a number: <%.*s>: table: <%.*s>", - id_length, id_text, - table_length, table_text); - } - } else { - ERR(GRN_INVALID_ARGUMENT, - "[table][get] key nor ID isn't specified: table: <%.*s>", - table_length, table_text); - } - } - break; - default: - ERR(GRN_INVALID_ARGUMENT, - "[table][get] not a table: <%.*s>", table_length, table_text); - break; - } - - return ctx->rc; -} - -static grn_obj * -command_get(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_id id = GRN_ID_NIL; - grn_obj *table = NULL; - if (!command_get_resolve_parameters(ctx, user_data, &table, &id)) { - grn_obj obj; - grn_obj_format format; - GRN_OUTPUT_ARRAY_OPEN("RESULT", 2); - GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id); - GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0); - GRN_RECORD_SET(ctx, &obj, id); - grn_obj_columns(ctx, table, GRN_TEXT_VALUE(VAR(2)), GRN_TEXT_LEN(VAR(2)), - &format.columns); - format.flags = - GRN_OBJ_FORMAT_WITH_COLUMN_NAMES | - GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET; - GRN_OUTPUT_OBJ(&obj, &format); - GRN_OBJ_FORMAT_FIN(ctx, &format); - GRN_OUTPUT_ARRAY_CLOSE(); - } - return NULL; -} - -static grn_obj * -command_push(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_obj *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0))); - if (table) { - switch (table->header.type) { - case GRN_TABLE_NO_KEY: - { - grn_array *array = (grn_array *)table; - grn_table_queue *queue = grn_array_queue(ctx, array); - if (queue) { - MUTEX_LOCK(queue->mutex); - if (grn_table_queue_head(queue) == queue->cap) { - grn_array_clear_curr_rec(ctx, array); - } - grn_load_(ctx, GRN_CONTENT_JSON, - GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)), - NULL, 0, - GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)), - NULL, 0, NULL, 0, 0); - if (grn_table_queue_size(queue) == queue->cap) { - grn_table_queue_tail_increment(queue); - } - grn_table_queue_head_increment(queue); - COND_SIGNAL(queue->cond); - MUTEX_UNLOCK(queue->mutex); - GRN_OUTPUT_BOOL(ctx->impl->loader.nrecords); - if (ctx->impl->loader.table) { - grn_db_touch(ctx, DB_OBJ(ctx->impl->loader.table)->db); - } - } else { - ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support push", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - } - break; - default : - ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support push", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - } else { - ERR(GRN_INVALID_ARGUMENT, "table '%.*s' does not exist.", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - return NULL; -} - -static grn_obj * -command_pull(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) -{ - grn_obj *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0))); - if (table) { - switch (table->header.type) { - case GRN_TABLE_NO_KEY: - { - grn_array *array = (grn_array *)table; - grn_table_queue *queue = grn_array_queue(ctx, array); - if (queue) { - MUTEX_LOCK(queue->mutex); - while (grn_table_queue_size(queue) == 0) { - if (GRN_TEXT_LEN(VAR(2))) { - MUTEX_UNLOCK(queue->mutex); - GRN_OUTPUT_BOOL(0); - return NULL; - } - COND_WAIT(queue->cond, queue->mutex); - } - grn_table_queue_tail_increment(queue); - { - grn_obj obj; - grn_obj_format format; - GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id); - GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0); - GRN_RECORD_SET(ctx, &obj, grn_table_queue_tail(queue)); - grn_obj_columns(ctx, table, GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)), - &format.columns); - format.flags = 0 /* GRN_OBJ_FORMAT_WITH_COLUMN_NAMES */; - GRN_OUTPUT_OBJ(&obj, &format); - GRN_OBJ_FORMAT_FIN(ctx, &format); - } - MUTEX_UNLOCK(queue->mutex); - } else { - ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support pull", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - } - break; - default : - ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support pull", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - } else { - ERR(GRN_INVALID_ARGUMENT, "table '%.*s' does not exist.", - (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); - } - return NULL; -} - -grn_rc -GRN_PLUGIN_INIT(grn_ctx *ctx) -{ - return GRN_SUCCESS; -} - -grn_rc -GRN_PLUGIN_REGISTER(grn_ctx *ctx) -{ - grn_expr_var vars[18]; - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "expression", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "result_set", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "set_operation", -1); - grn_plugin_expr_var_init(ctx, &vars[4], "allow_update", -1); - grn_plugin_command_create(ctx, "filter_by_script", -1, command_filter_by_script, 5, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "column", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "operator", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "value", -1); - grn_plugin_expr_var_init(ctx, &vars[4], "result_set", -1); - grn_plugin_expr_var_init(ctx, &vars[5], "set_operation", -1); - grn_plugin_command_create(ctx, "filter", -1, command_filter, 6, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "key", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "result_set", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "range_gap", -1); - grn_plugin_command_create(ctx, "group", -1, command_group, 4, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "keys", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "offset", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "limit", -1); - grn_plugin_command_create(ctx, "sort", -1, command_sort, 4, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "columns", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "offset", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "limit", -1); - grn_plugin_command_create(ctx, "output", -1, command_output, 4, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "expression", -1); - grn_plugin_command_create(ctx, "each", -1, command_each, 2, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_command_create(ctx, "unlink", -1, command_unlink, 1, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "values", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "key", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "columns", -1); - grn_plugin_expr_var_init(ctx, &vars[4], "output_columns", -1); - grn_plugin_expr_var_init(ctx, &vars[5], "id", -1); - grn_plugin_command_create(ctx, "add", -1, command_add, 2, vars); - grn_plugin_command_create(ctx, "push", -1, command_push, 2, vars); - grn_plugin_command_create(ctx, "set", -1, command_set, 6, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "key", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "output_columns", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "id", -1); - grn_plugin_command_create(ctx, "get", -1, command_get, 4, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "output_columns", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "non_block", -1); - grn_plugin_command_create(ctx, "pull", -1, command_pull, 3, vars); - - grn_plugin_expr_var_init(ctx, &vars[0], "table", -1); - grn_plugin_expr_var_init(ctx, &vars[1], "columns", -1); - grn_plugin_expr_var_init(ctx, &vars[2], "query", -1); - grn_plugin_expr_var_init(ctx, &vars[3], "result_set", -1); - grn_plugin_expr_var_init(ctx, &vars[4], "set_operation", -1); - grn_plugin_expr_var_init(ctx, &vars[5], "allow_column_expression", -1); - grn_plugin_expr_var_init(ctx, &vars[6], "allow_pragma", -1); - grn_plugin_command_create(ctx, "match", -1, command_match, 7, vars); - - return ctx->rc; -} - -grn_rc -GRN_PLUGIN_FIN(grn_ctx *ctx) -{ - return GRN_SUCCESS; -} diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c b/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c index 63e640d5ea2..e918ed8a1e5 100644 --- a/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c +++ b/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c @@ -92,15 +92,15 @@ is_stemmable(grn_obj *data, grn_bool *is_all_upper) end = current + GRN_TEXT_LEN(data); for (; current < end; current++) { - if (islower(*current)) { + if (islower((unsigned char)*current)) { have_lower = GRN_TRUE; continue; } - if (isupper(*current)) { + if (isupper((unsigned char)*current)) { have_upper = GRN_TRUE; continue; } - if (isdigit(*current)) { + if (isdigit((unsigned char)*current)) { continue; } switch (*current) { @@ -131,11 +131,11 @@ normalize(grn_ctx *ctx, end = current + length; for (; current < end; current++) { - if (isupper(*current)) { + if (isupper((unsigned char)*current)) { if (current > unwritten) { GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten); } - GRN_TEXT_PUTC(ctx, normalized, tolower(*current)); + GRN_TEXT_PUTC(ctx, normalized, tolower((unsigned char)*current)); unwritten = current + 1; } } @@ -157,11 +157,11 @@ unnormalize(grn_ctx *ctx, end = current + length; for (; current < end; current++) { - if (islower(*current)) { + if (islower((unsigned char)*current)) { if (current > unwritten) { GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten); } - GRN_TEXT_PUTC(ctx, normalized, toupper(*current)); + GRN_TEXT_PUTC(ctx, normalized, toupper((unsigned char)*current)); unwritten = current + 1; } } diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt index 9209b44d36f..8eec25d683c 100644 --- a/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt @@ -22,6 +22,17 @@ if(GRN_WITH_MECAB) read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/mecab_sources.am MECAB_SOURCES) include_directories(${MECAB_INCLUDE_DIRS}) link_directories(${MECAB_LIBRARY_DIRS}) + if(GRN_WITH_BUNDLED_MECAB) + set(GRN_BUNDLED_MECAB_RELATIVE_RC_PATH "${CONFIG_DIR}/mecabrc") + set(MECAB_COMPILE_DEFINITIONS + "GRN_WITH_BUNDLED_MECAB" + "GRN_BUNDLED_MECAB_RELATIVE_RC_PATH=\"${GRN_BUNDLED_MECAB_RELATIVE_RC_PATH}\"" + "GRN_BUNDLED_MECAB_RC_PATH=\"${CMAKE_INSTALL_PREFIX}/${GRN_BUNDLED_MECAB_RELATIVE_RC_PATH}\"") + set_source_files_properties(${MECAB_SOURCES} + PROPERTIES + COMPILE_DEFINITIONS + "${MECAB_COMPILE_DEFINITIONS}") + endif() set_source_files_properties(${MECAB_SOURCES} PROPERTIES COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}") diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am index 386b1554f73..9e10612baaa 100644 --- a/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am +++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am @@ -14,12 +14,12 @@ AM_LDFLAGS = \ LIBS = \ $(top_builddir)/lib/libgroonga.la -tokenizers_plugins_LTLIBRARIES = +tokenizer_plugins_LTLIBRARIES = if WITH_MECAB -tokenizers_plugins_LTLIBRARIES += mecab.la +tokenizer_plugins_LTLIBRARIES += mecab.la endif if WITH_KYTEA -tokenizers_plugins_LTLIBRARIES += kytea.la +tokenizer_plugins_LTLIBRARIES += kytea.la endif include mecab_sources.am diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c index bade2f9d3de..eeb027acb92 100644 --- a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c +++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c @@ -1,5 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2015 Brazil +/* + Copyright(C) 2009-2016 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -64,14 +65,14 @@ mecab_global_error_message(void) static grn_encoding translate_mecab_charset_to_grn_encoding(const char *charset) { - if (strcasecmp(charset, "euc-jp") == 0) { + if (grn_strcasecmp(charset, "euc-jp") == 0) { return GRN_ENC_EUC_JP; - } else if (strcasecmp(charset, "utf-8") == 0 || - strcasecmp(charset, "utf8") == 0) { + } else if (grn_strcasecmp(charset, "utf-8") == 0 || + grn_strcasecmp(charset, "utf8") == 0) { return GRN_ENC_UTF8; - } else if (strcasecmp(charset, "shift_jis") == 0 || - strcasecmp(charset, "shift-jis") == 0 || - strcasecmp(charset, "sjis") == 0) { + } else if (grn_strcasecmp(charset, "shift_jis") == 0 || + grn_strcasecmp(charset, "shift-jis") == 0 || + grn_strcasecmp(charset, "sjis") == 0) { return GRN_ENC_SJIS; } return GRN_ENC_NONE; @@ -169,7 +170,7 @@ chunked_tokenize_utf8_chunk(grn_ctx *ctx, tokenized_chunk_length = strlen(tokenized_chunk); if (tokenized_chunk_length >= 1 && - isspace(tokenized_chunk[tokenized_chunk_length - 1])) { + isspace((unsigned char)tokenized_chunk[tokenized_chunk_length - 1])) { GRN_TEXT_PUT(ctx, &(tokenizer->buf), tokenized_chunk, tokenized_chunk_length - 1); } else { @@ -271,6 +272,65 @@ chunked_tokenize_utf8(grn_ctx *ctx, } } +static mecab_t * +mecab_create(grn_ctx *ctx) +{ + mecab_t *mecab; + int argc = 0; + const char *argv[4]; + + argv[argc++] = "Groonga"; + argv[argc++] = "-Owakati"; +#ifdef GRN_WITH_BUNDLED_MECAB + argv[argc++] = "--rcfile"; +# ifdef WIN32 + { + static char windows_mecab_rc_file[PATH_MAX]; + + grn_strcpy(windows_mecab_rc_file, + PATH_MAX, + grn_plugin_windows_base_dir()); + grn_strcat(windows_mecab_rc_file, + PATH_MAX, + "/"); + grn_strcat(windows_mecab_rc_file, + PATH_MAX, + GRN_BUNDLED_MECAB_RELATIVE_RC_PATH); + { + char *c; + for (c = windows_mecab_rc_file; *c != '\0'; c++) { + if (*c == '/') { + *c = '\\'; + } + } + } + argv[argc++] = windows_mecab_rc_file; + } +# else /* WIN32 */ + argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH; +# endif /* WIN32 */ +#endif /* GRN_WITH_BUNDLED_MECAB */ + mecab = mecab_new(argc, (char **)argv); + + if (!mecab) { +#ifdef GRN_WITH_BUNDLED_MECAB + GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, + "[tokenizer][mecab] failed to create mecab_t: %s: " + "mecab_new(\"%s\", \"%s\", \"%s\", \"%s\")", + mecab_global_error_message(), + argv[0], argv[1], argv[2], argv[3]); +#else /* GRN_WITH_BUNDLED_MECAB */ + GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, + "[tokenizer][mecab] failed to create mecab_t: %s: " + "mecab_new(\"%s\", \"%s\")", + mecab_global_error_message(), + argv[0], argv[1]); +#endif /* GRN_WITH_BUNDLED_MECAB */ + } + + return mecab; +} + /* This function is called for a full text search query or a document to be indexed. This means that both short/long strings are given. @@ -294,13 +354,8 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) if (!sole_mecab) { grn_plugin_mutex_lock(ctx, sole_mecab_mutex); if (!sole_mecab) { - sole_mecab = mecab_new2("-Owakati"); - if (!sole_mecab) { - GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, - "[tokenizer][mecab] " - "mecab_new2() failed on mecab_init(): %s", - mecab_global_error_message()); - } else { + sole_mecab = mecab_create(ctx); + if (sole_mecab) { sole_mecab_encoding = get_mecab_encoding(sole_mecab); } } @@ -479,28 +534,24 @@ check_mecab_dictionary_encoding(grn_ctx *ctx) { #ifdef HAVE_MECAB_DICTIONARY_INFO_T mecab_t *mecab; + grn_encoding encoding; + grn_bool have_same_encoding_dictionary; - mecab = mecab_new2("-Owakati"); - if (mecab) { - grn_encoding encoding; - grn_bool have_same_encoding_dictionary; + mecab = mecab_create(ctx); + if (!mecab) { + return; + } - encoding = GRN_CTX_GET_ENCODING(ctx); - have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab)); - mecab_destroy(mecab); + encoding = GRN_CTX_GET_ENCODING(ctx); + have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab)); + mecab_destroy(mecab); - if (!have_same_encoding_dictionary) { - GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, - "[tokenizer][mecab] " - "MeCab has no dictionary that uses the context encoding" - ": <%s>", - grn_encoding_to_string(encoding)); - } - } else { + if (!have_same_encoding_dictionary) { GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, "[tokenizer][mecab] " - "mecab_new2 failed in check_mecab_dictionary_encoding: %s", - mecab_global_error_message()); + "MeCab has no dictionary that uses the context encoding" + ": <%s>", + grn_encoding_to_string(encoding)); } #endif } @@ -549,6 +600,11 @@ GRN_PLUGIN_INIT(grn_ctx *ctx) } check_mecab_dictionary_encoding(ctx); + if (ctx->rc != GRN_SUCCESS) { + grn_plugin_mutex_close(ctx, sole_mecab_mutex); + sole_mecab_mutex = NULL; + } + return ctx->rc; } |