summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/plugins
diff options
context:
space:
mode:
authorKentoku SHIBA <kentokushiba@gmail.com>2014-09-21 00:33:45 +0900
committerKentoku SHIBA <kentokushiba@gmail.com>2014-09-21 00:33:45 +0900
commit0cc855cdc8cd0baa6ba50662632b299a3843ff13 (patch)
treeeaf50856703412b5c1c43f8c0e5a6a5318601c17 /storage/mroonga/vendor/groonga/plugins
parent989dd4d9ec09450ff7b25987b14ee9fdfd21ad4e (diff)
downloadmariadb-git-0cc855cdc8cd0baa6ba50662632b299a3843ff13.tar.gz
Update Mroonga to the latest version on 2014-09-21T00:33:44+0900
Diffstat (limited to 'storage/mroonga/vendor/groonga/plugins')
-rw-r--r--storage/mroonga/vendor/groonga/plugins/CMakeLists.txt20
-rw-r--r--storage/mroonga/vendor/groonga/plugins/Makefile.am9
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt32
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am20
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c299
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt44
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am29
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/eval.c64
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am3
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/load.c63
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am3
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h76
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt29
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am24
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/suggest.c1022
-rw-r--r--storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt29
-rw-r--r--storage/mroonga/vendor/groonga/plugins/table/Makefile.am24
-rw-r--r--storage/mroonga/vendor/groonga/plugins/table/sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/table/table.c747
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt53
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am33
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp354
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c338
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am2
27 files changed, 3325 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt
new file mode 100644
index 00000000000..a2a8f288d52
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright(C) 2012 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+add_subdirectory(suggest)
+add_subdirectory(tokenizers)
+add_subdirectory(table)
+add_subdirectory(query_expanders)
+add_subdirectory(ruby)
diff --git a/storage/mroonga/vendor/groonga/plugins/Makefile.am b/storage/mroonga/vendor/groonga/plugins/Makefile.am
new file mode 100644
index 00000000000..75a00597e06
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/Makefile.am
@@ -0,0 +1,9 @@
+SUBDIRS = \
+ tokenizers \
+ suggest \
+ table \
+ query_expanders \
+ ruby
+
+EXTRA_DIST = \
+ CMakeLists.txt
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt
new file mode 100644
index 00000000000..1e2a2c23b09
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt
@@ -0,0 +1,32 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(QUERY_EXPANDERS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/query_expanders")
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/tsv_sources.am TSV_SOURCES)
+set_source_files_properties(${TSV_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+add_library(tsv_query_expander MODULE ${TSV_SOURCES})
+set_target_properties(tsv_query_expander PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "tsv")
+target_link_libraries(tsv_query_expander libgroonga)
+if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS tsv_query_expander DESTINATION "${QUERY_EXPANDERS_DIR}")
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am
new file mode 100644
index 00000000000..ca26760d332
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am
@@ -0,0 +1,20 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+query_expanders_plugins_LTLIBRARIES =
+query_expanders_plugins_LTLIBRARIES += tsv.la
+
+include tsv_sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c
new file mode 100644
index 00000000000..6b1fc51d6dc
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c
@@ -0,0 +1,299 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <groonga/plugin.h>
+
+/* groonga's internal headers */
+/* for grn_text_fgets(): We don't want to require stdio.h for groonga.h.
+ What should we do? Should we split header file such as groonga/stdio.h? */
+#include <str.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef HAVE__STRNICMP
+# define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n)
+#endif /* HAVE__STRNICMP */
+
+#define MAX_SYNONYM_BYTES 4096
+
+static grn_hash *synonyms = NULL;
+
+#ifdef WIN32
+static char *win32_synonyms_file = NULL;
+const char *
+get_system_synonyms_file(void)
+{
+ if (!win32_synonyms_file) {
+ const char *base_dir;
+ const char *relative_path = GRN_QUERY_EXPANDER_TSV_RELATIVE_SYNONYMS_FILE;
+ char *synonyms_file;
+ char *path;
+ size_t base_dir_length;
+
+ base_dir = grn_plugin_win32_base_dir();
+ base_dir_length = strlen(base_dir);
+ synonyms_file =
+ malloc(base_dir_length + strlen("/") + strlen(relative_path) + 1);
+ strcpy(synonyms_file, base_dir);
+ strcat(synonyms_file, "/");
+ strcat(synonyms_file, relative_path);
+ win32_synonyms_file = synonyms_file;
+ }
+ return win32_synonyms_file;
+}
+
+#else /* WIN32 */
+const char *
+get_system_synonyms_file(void)
+{
+ return GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE;
+}
+#endif /* WIN32 */
+
+static inline grn_bool
+is_comment_mark(char character)
+{
+ return character == '#';
+}
+
+static inline grn_encoding
+detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length)
+{
+ grn_encoding encoding = GRN_ENC_NONE;
+ grn_obj null_terminated_line_buffer;
+ const char *c_line;
+ const char *coding_part_keyword = "coding: ";
+ const char *coding_part;
+ const char *encoding_name;
+
+ GRN_TEXT_INIT(&null_terminated_line_buffer, 0);
+ GRN_TEXT_PUT(ctx, &null_terminated_line_buffer, line, line_length);
+ GRN_TEXT_PUTC(ctx, &null_terminated_line_buffer, '\0');
+
+ c_line = GRN_TEXT_VALUE(&null_terminated_line_buffer);
+ coding_part = strstr(c_line, coding_part_keyword);
+ if (coding_part) {
+ encoding_name = coding_part + strlen(coding_part_keyword);
+ if (strncasecmp(encoding_name, "utf-8", strlen("utf-8")) == 0 ||
+ strncasecmp(encoding_name, "utf8", strlen("utf8")) == 0) {
+ encoding = GRN_ENC_UTF8;
+ } else if (strncasecmp(encoding_name, "sjis", strlen("sjis")) == 0 ||
+ strncasecmp(encoding_name, "Shift_JIS", strlen("Shift_JIS")) == 0) {
+ encoding = GRN_ENC_SJIS;
+ } else if (strncasecmp(encoding_name, "EUC-JP", strlen("EUC-JP")) == 0 ||
+ strncasecmp(encoding_name, "euc_jp", strlen("euc_jp")) == 0) {
+ encoding = GRN_ENC_EUC_JP;
+ } else if (strncasecmp(encoding_name, "latin1", strlen("latin1")) == 0) {
+ encoding = GRN_ENC_LATIN1;
+ } else if (strncasecmp(encoding_name, "KOI8-R", strlen("KOI8-R")) == 0 ||
+ strncasecmp(encoding_name, "koi8r", strlen("koi8r")) == 0) {
+ encoding = GRN_ENC_KOI8R;
+ }
+ } else {
+ encoding = ctx->encoding;
+ }
+ GRN_OBJ_FIN(ctx, &null_terminated_line_buffer);
+
+ return encoding;
+}
+
+static inline grn_encoding
+guess_encoding(grn_ctx *ctx, const char **line, size_t *line_length)
+{
+ const char bom[] = {0xef, 0xbb, 0xbf};
+ size_t bom_length = sizeof(bom);
+
+ if (*line_length >= bom_length && memcmp(*line, bom, bom_length) == 0) {
+ *line += bom_length;
+ *line_length -= bom_length;
+ return GRN_ENC_UTF8;
+ }
+
+ if (!is_comment_mark((*line)[0])) {
+ return ctx->encoding;
+ }
+
+ return detect_coding_part(ctx, (*line) + 1, (*line_length) - 1);
+}
+
+static void
+parse_synonyms_file_line(grn_ctx *ctx, const char *line, int line_length,
+ grn_obj *key, grn_obj *value)
+{
+ size_t i = 0;
+
+ if (is_comment_mark(line[i])) {
+ return;
+ }
+
+ while (i < line_length) {
+ char character = line[i];
+ i++;
+ if (character == '\t') {
+ break;
+ }
+ GRN_TEXT_PUTC(ctx, key, character);
+ }
+
+ if (i == line_length) {
+ return;
+ }
+
+ GRN_TEXT_PUTS(ctx, value, "((");
+ while (i < line_length) {
+ char character = line[i];
+ i++;
+ if (character == '\t') {
+ GRN_TEXT_PUTS(ctx, value, ") OR (");
+ } else {
+ GRN_TEXT_PUTC(ctx, value, character);
+ }
+ }
+ GRN_TEXT_PUTS(ctx, value, "))");
+
+ {
+ grn_id id;
+ void *value_location = NULL;
+
+ id = grn_hash_add(ctx, synonyms, GRN_TEXT_VALUE(key), GRN_TEXT_LEN(key),
+ &value_location, NULL);
+ if (id == GRN_ID_NIL) {
+ GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
+ "[plugin][query-expander][tsv] "
+ "failed to register key: <%.*s>",
+ (int)GRN_TEXT_LEN(key), GRN_TEXT_VALUE(key));
+ return;
+ }
+
+ grn_bulk_truncate(ctx, value, MAX_SYNONYM_BYTES - 1);
+ GRN_TEXT_PUTC(ctx, value, '\0');
+ memcpy(value_location, GRN_TEXT_VALUE(value), MAX_SYNONYM_BYTES);
+ }
+}
+
+static void
+load_synonyms(grn_ctx *ctx)
+{
+ const char *path;
+ FILE *file;
+ int number_of_lines;
+ grn_encoding encoding;
+ grn_obj line, key, value;
+
+ path = getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE");
+ if (!path) {
+ path = get_system_synonyms_file();
+ }
+ file = fopen(path, "r");
+ if (!file) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[plugin][query-expander][tsv] "
+ "synonyms file doesn't exist: <%s>",
+ path);
+ return;
+ }
+
+ GRN_TEXT_INIT(&line, 0);
+ GRN_TEXT_INIT(&key, 0);
+ GRN_TEXT_INIT(&value, 0);
+ grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES);
+ number_of_lines = 0;
+ while (grn_text_fgets(ctx, &line, file) == GRN_SUCCESS) {
+ const char *line_value = GRN_TEXT_VALUE(&line);
+ size_t line_length = GRN_TEXT_LEN(&line);
+
+ number_of_lines++;
+ if (number_of_lines == 1) {
+ encoding = guess_encoding(ctx, &line_value, &line_length);
+ }
+ GRN_BULK_REWIND(&key);
+ GRN_BULK_REWIND(&value);
+ parse_synonyms_file_line(ctx, line_value, line_length, &key, &value);
+ GRN_BULK_REWIND(&line);
+ }
+ GRN_OBJ_FIN(ctx, &line);
+ GRN_OBJ_FIN(ctx, &key);
+ GRN_OBJ_FIN(ctx, &value);
+
+ fclose(file);
+}
+
+static grn_obj *
+func_query_expander_tsv(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_rc rc = GRN_END_OF_DATA;
+ grn_id id;
+ grn_obj *term, *expanded_term;
+ void *value;
+ grn_obj *rc_object;
+
+ term = args[0];
+ expanded_term = args[1];
+ id = grn_hash_get(ctx, synonyms,
+ GRN_TEXT_VALUE(term), GRN_TEXT_LEN(term),
+ &value);
+ if (id != GRN_ID_NIL) {
+ const char *query = value;
+ GRN_TEXT_PUTS(ctx, expanded_term, query);
+ rc = GRN_SUCCESS;
+ }
+
+ rc_object = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_INT32, 0);
+ if (rc_object) {
+ GRN_INT32_SET(ctx, rc_object, rc);
+ }
+
+ return rc_object;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ if (!synonyms) {
+ synonyms = grn_hash_create(ctx, NULL,
+ GRN_TABLE_MAX_KEY_SIZE,
+ MAX_SYNONYM_BYTES,
+ GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_VAR_SIZE);
+ if (!synonyms) {
+ return ctx->rc;
+ }
+ load_synonyms(ctx);
+ }
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_proc_create(ctx, "QueryExpanderTSV", strlen("QueryExpanderTSV"),
+ GRN_PROC_FUNCTION,
+ func_query_expander_tsv, NULL, NULL,
+ 0, NULL);
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ if (synonyms) {
+ grn_hash_close(ctx, synonyms);
+ synonyms = NULL;
+ }
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am
new file mode 100644
index 00000000000..f1bdabede85
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am
@@ -0,0 +1,2 @@
+tsv_la_SOURCES = \
+ tsv.c
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt
new file mode 100644
index 00000000000..d82b154098c
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt
@@ -0,0 +1,44 @@
+# Copyright(C) 2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ ${MRUBY_INCLUDE_DIRS})
+
+if(GRN_WITH_MRUBY)
+ set(GRN_RELATIVE_RUBY_PLUGINS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/ruby")
+
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/eval_sources.am RUBY_EVAL_SOURCES)
+ add_library(eval MODULE ${RUBY_EVAL_SOURCES})
+ set_source_files_properties(${RUBY_EVAL_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+ set_target_properties(eval PROPERTIES PREFIX "")
+ target_link_libraries(eval libgroonga)
+ if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS eval DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}")
+ endif()
+
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/load_sources.am RUBY_LOAD_SOURCES)
+ add_library(load MODULE ${RUBY_LOAD_SOURCES})
+ set_source_files_properties(${RUBY_LOAD_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+ set_target_properties(load PROPERTIES PREFIX "")
+ target_link_libraries(load libgroonga)
+ if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS load DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am
new file mode 100644
index 00000000000..381fb47160b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am
@@ -0,0 +1,29 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CFLAGS = \
+ $(MESSAGE_PACK_CFLAGS) \
+ $(MRUBY_CFLAGS)
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la \
+ $(MESSAGE_PACK_LIBS)
+
+if WITH_MRUBY
+ruby_plugins_LTLIBRARIES = \
+ eval.la \
+ load.la
+endif
+
+include eval_sources.am
+include load_sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval.c b/storage/mroonga/vendor/groonga/plugins/ruby/eval.c
new file mode 100644
index 00000000000..ad1e7948249
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/eval.c
@@ -0,0 +1,64 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/*
+ Copyright(C) 2013 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "ruby_plugin.h"
+
+static grn_obj *
+command_ruby_eval(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ mrb_state *mrb = ctx->impl->mrb.state;
+ grn_obj *script;
+ mrb_value result;
+
+ script = VAR(0);
+ switch (script->header.domain) {
+ case GRN_DB_SHORT_TEXT :
+ case GRN_DB_TEXT :
+ case GRN_DB_LONG_TEXT :
+ break;
+ default :
+ {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, script);
+ ERR(GRN_INVALID_ARGUMENT, "script must be a string: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ break;
+ }
+
+ mrb->exc = NULL;
+ result = grn_mrb_eval(ctx, GRN_TEXT_VALUE(script), GRN_TEXT_LEN(script));
+ output_result(ctx, result);
+
+ return NULL;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_expr_var vars[1];
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "script", -1);
+ grn_plugin_command_create(ctx, "ruby_eval", -1, command_ruby_eval, 1, vars);
+
+ return ctx->rc;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am
new file mode 100644
index 00000000000..08543e43fb4
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/eval_sources.am
@@ -0,0 +1,3 @@
+eval_la_SOURCES = \
+ ruby_plugin.h \
+ eval.c
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/load.c b/storage/mroonga/vendor/groonga/plugins/ruby/load.c
new file mode 100644
index 00000000000..a4e60acc357
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/load.c
@@ -0,0 +1,63 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/*
+ Copyright(C) 2013 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "ruby_plugin.h"
+
+static grn_obj *
+command_ruby_load(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *path;
+ mrb_value result;
+
+ path = VAR(0);
+ switch (path->header.domain) {
+ case GRN_DB_SHORT_TEXT :
+ case GRN_DB_TEXT :
+ case GRN_DB_LONG_TEXT :
+ break;
+ default :
+ {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, path);
+ ERR(GRN_INVALID_ARGUMENT, "path must be a string: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ break;
+ }
+
+ GRN_TEXT_PUTC(ctx, path, '\0');
+ result = grn_mrb_load(ctx, GRN_TEXT_VALUE(path));
+ output_result(ctx, result);
+
+ return NULL;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_expr_var vars[1];
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "path", -1);
+ grn_plugin_command_create(ctx, "ruby_load", -1, command_ruby_load, 1, vars);
+
+ return ctx->rc;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am
new file mode 100644
index 00000000000..d1cce258caa
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/load_sources.am
@@ -0,0 +1,3 @@
+load_la_SOURCES = \
+ ruby_plugin.h \
+ load.c
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h b/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h
new file mode 100644
index 00000000000..5314ea68fe5
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/ruby_plugin.h
@@ -0,0 +1,76 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/*
+ Copyright(C) 2013 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <mrb.h>
+#include <output.h>
+#include <db.h>
+#include <ctx_impl.h>
+#include <util.h>
+
+#include <groonga/plugin.h>
+
+#include <mruby.h>
+
+#define VAR GRN_PROC_GET_VAR_BY_OFFSET
+
+static void
+output_result(grn_ctx *ctx, mrb_value result)
+{
+ mrb_state *mrb = ctx->impl->mrb.state;
+
+ GRN_OUTPUT_MAP_OPEN("result", 1);
+ if (mrb->exc) {
+ mrb_value mrb_message;
+ grn_obj grn_message;
+ GRN_OUTPUT_CSTR("exception");
+ GRN_OUTPUT_MAP_OPEN("exception", 1);
+ GRN_OUTPUT_CSTR("message");
+ mrb_message = mrb_funcall(mrb, mrb_obj_value(mrb->exc), "message", 0);
+ GRN_VOID_INIT(&grn_message);
+ if (grn_mrb_to_grn(ctx, mrb_message, &grn_message) == GRN_SUCCESS) {
+ GRN_OUTPUT_OBJ(&grn_message, NULL);
+ } else {
+ GRN_OUTPUT_CSTR("unsupported message type");
+ }
+ grn_obj_unlink(ctx, &grn_message);
+ GRN_OUTPUT_MAP_CLOSE();
+ } else {
+ grn_obj grn_result;
+ GRN_OUTPUT_CSTR("value");
+ GRN_VOID_INIT(&grn_result);
+ if (grn_mrb_to_grn(ctx, result, &grn_result) == GRN_SUCCESS) {
+ GRN_OUTPUT_OBJ(&grn_result, NULL);
+ } else {
+ GRN_OUTPUT_CSTR("unsupported return value");
+ }
+ grn_obj_unlink(ctx, &grn_result);
+ }
+ GRN_OUTPUT_MAP_CLOSE();
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt
new file mode 100644
index 00000000000..72b86362c55
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ ${MRUBY_INCLUDE_DIRS})
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am SUGGEST_SOURCES)
+add_library(suggest MODULE ${SUGGEST_SOURCES})
+set_source_files_properties(${SUGGEST_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+set_target_properties(suggest PROPERTIES PREFIX "")
+target_link_libraries(suggest libgroonga)
+if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS suggest DESTINATION "${GRN_RELATIVE_PLUGINS_DIR}/suggest")
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am b/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am
new file mode 100644
index 00000000000..7f321b6c482
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am
@@ -0,0 +1,24 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CFLAGS = \
+ $(MESSAGE_PACK_CFLAGS) \
+ $(MRUBY_CFLAGS)
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la \
+ $(MESSAGE_PACK_LIBS)
+
+suggest_plugins_LTLIBRARIES = suggest.la
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/sources.am b/storage/mroonga/vendor/groonga/plugins/suggest/sources.am
new file mode 100644
index 00000000000..798a431a8e8
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/sources.am
@@ -0,0 +1,2 @@
+suggest_la_SOURCES = \
+ suggest.c
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c
new file mode 100644
index 00000000000..ea7b6adbf6b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c
@@ -0,0 +1,1022 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/* Copyright(C) 2010-2013 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "ctx.h"
+#include "db.h"
+#include "ii.h"
+#include "token.h"
+#include "output.h"
+#include <groonga/plugin.h>
+#include <string.h>
+
+#ifdef HAVE__STRNICMP
+# define strncasecmp(s1,s2,n) _strnicmp(s1,s2,n)
+#endif /* HAVE__STRNICMP */
+
+#define VAR GRN_PROC_GET_VAR_BY_OFFSET
+#define CONST_STR_LEN(x) x, x ? sizeof(x) - 1 : 0
+#define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x)
+
+#define MIN_LEARN_DISTANCE (60 * GRN_TIME_USEC_PER_SEC)
+
+#define COMPLETE 1
+#define CORRECT 2
+#define SUGGEST 4
+
+typedef enum {
+ GRN_SUGGEST_SEARCH_YES,
+ GRN_SUGGEST_SEARCH_NO,
+ GRN_SUGGEST_SEARCH_AUTO
+} grn_suggest_search_mode;
+
+typedef struct {
+ grn_obj *post_event;
+ grn_obj *post_type;
+ grn_obj *post_item;
+ grn_obj *seq;
+ grn_obj *post_time;
+ grn_obj *pairs;
+
+ int learn_distance_in_seconds;
+
+ grn_id post_event_id;
+ grn_id post_type_id;
+ grn_id post_item_id;
+ grn_id seq_id;
+ int64_t post_time_value;
+
+ grn_obj *seqs;
+ grn_obj *seqs_events;
+ grn_obj *events;
+ grn_obj *events_item;
+ grn_obj *events_type;
+ grn_obj *events_time;
+ grn_obj *event_types;
+ grn_obj *items;
+ grn_obj *items_freq;
+ grn_obj *items_freq2;
+ grn_obj *items_last;
+ grn_obj *pairs_pre;
+ grn_obj *pairs_post;
+ grn_obj *pairs_freq0;
+ grn_obj *pairs_freq1;
+ grn_obj *pairs_freq2;
+
+ grn_obj dataset_name;
+
+ grn_obj *configuration;
+
+ grn_obj weight;
+ grn_obj pre_events;
+
+ uint64_t key_prefix;
+ grn_obj pre_item;
+} grn_suggest_learner;
+
+static int
+grn_parse_suggest_types(grn_obj *text)
+{
+ const char *nptr = GRN_TEXT_VALUE(text);
+ const char *end = GRN_BULK_CURR(text);
+ int types = 0;
+ while (nptr < end) {
+ if (*nptr == '|') {
+ nptr += 1;
+ continue;
+ }
+ {
+ const char string[] = "complete";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= COMPLETE;
+ nptr += length;
+ continue;
+ }
+ }
+ {
+ const char string[] = "correct";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= CORRECT;
+ nptr += length;
+ continue;
+ }
+ }
+ {
+ const char string[] = "suggest";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= SUGGEST;
+ nptr += length;
+ continue;
+ }
+ }
+ break;
+ }
+ return types;
+}
+
+static int32_t
+cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id id,
+ grn_obj *res, int query_type, int frequency_threshold,
+ double conditional_probability_threshold)
+{
+ int32_t max_score = 0;
+ if (id) {
+ grn_ii_cursor *c;
+ grn_obj *co = grn_obj_column(ctx, items, CONST_STR_LEN("co"));
+ grn_obj *pairs = grn_ctx_at(ctx, grn_obj_get_range(ctx, co));
+ grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ grn_obj *pairs_freq, *pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post"));
+ switch (query_type) {
+ case COMPLETE :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0"));
+ break;
+ case CORRECT :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1"));
+ break;
+ case SUGGEST :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2"));
+ break;
+ default :
+ return max_score;
+ }
+ if ((c = grn_ii_cursor_open(ctx, (grn_ii *)co, id, GRN_ID_NIL, GRN_ID_MAX,
+ ((grn_ii *)co)->n_elements - 1, 0))) {
+ grn_ii_posting *p;
+ grn_obj post, pair_freq, item_freq, item_freq2, item_boost;
+ GRN_RECORD_INIT(&post, 0, grn_obj_id(ctx, items));
+ GRN_INT32_INIT(&pair_freq, 0);
+ GRN_INT32_INIT(&item_freq, 0);
+ GRN_INT32_INIT(&item_freq2, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ while ((p = grn_ii_cursor_next(ctx, c))) {
+ grn_id post_id;
+ int pfreq, ifreq, ifreq2, boost;
+ double conditional_probability;
+ GRN_BULK_REWIND(&post);
+ GRN_BULK_REWIND(&pair_freq);
+ GRN_BULK_REWIND(&item_freq);
+ GRN_BULK_REWIND(&item_freq2);
+ GRN_BULK_REWIND(&item_boost);
+ grn_obj_get_value(ctx, pairs_post, p->rid, &post);
+ grn_obj_get_value(ctx, pairs_freq, p->rid, &pair_freq);
+ post_id = GRN_RECORD_VALUE(&post);
+ grn_obj_get_value(ctx, items_freq, post_id, &item_freq);
+ grn_obj_get_value(ctx, items_freq2, post_id, &item_freq2);
+ grn_obj_get_value(ctx, items_boost, post_id, &item_boost);
+ pfreq = GRN_INT32_VALUE(&pair_freq);
+ ifreq = GRN_INT32_VALUE(&item_freq);
+ ifreq2 = GRN_INT32_VALUE(&item_freq2);
+ if (ifreq2 > 0) {
+ conditional_probability = (double)pfreq / (double)ifreq2;
+ } else {
+ conditional_probability = 0.0;
+ }
+ boost = GRN_INT32_VALUE(&item_boost);
+ if (pfreq >= frequency_threshold && ifreq >= frequency_threshold &&
+ conditional_probability >= conditional_probability_threshold &&
+ boost >= 0) {
+ grn_rset_recinfo *ri;
+ void *value;
+ int32_t score = pfreq;
+ int added;
+ if (max_score < score + boost) { max_score = score + boost; }
+ /* put any formula if desired */
+ if (grn_hash_add(ctx, (grn_hash *)res,
+ &post_id, sizeof(grn_id), &value, &added)) {
+ ri = value;
+ ri->score += score;
+ if (added) {
+ ri->score += boost;
+ }
+ }
+ }
+ }
+ GRN_OBJ_FIN(ctx, &post);
+ GRN_OBJ_FIN(ctx, &pair_freq);
+ GRN_OBJ_FIN(ctx, &item_freq);
+ GRN_OBJ_FIN(ctx, &item_freq2);
+ GRN_OBJ_FIN(ctx, &item_boost);
+ grn_ii_cursor_close(ctx, c);
+ }
+ }
+ return max_score;
+}
+
+#define DEFAULT_LIMIT 10
+#define DEFAULT_SORTBY "-_score"
+#define DEFAULT_OUTPUT_COLUMNS "_key,_score"
+#define DEFAULT_FREQUENCY_THRESHOLD 100
+#define DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD 0.2
+
+static void
+output(grn_ctx *ctx, grn_obj *table, grn_obj *res, grn_id tid,
+ grn_obj *sortby, grn_obj *output_columns, int offset, int limit)
+{
+ grn_obj *sorted;
+ if ((sorted = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_NO_KEY, NULL, res))) {
+ uint32_t nkeys;
+ grn_obj_format format;
+ grn_table_sort_key *keys;
+ const char *sortby_val = GRN_TEXT_VALUE(sortby);
+ unsigned int sortby_len = GRN_TEXT_LEN(sortby);
+ const char *oc_val = GRN_TEXT_VALUE(output_columns);
+ unsigned int oc_len = GRN_TEXT_LEN(output_columns);
+ if (!sortby_val || !sortby_len) {
+ sortby_val = DEFAULT_SORTBY;
+ sortby_len = sizeof(DEFAULT_SORTBY) - 1;
+ }
+ if (!oc_val || !oc_len) {
+ oc_val = DEFAULT_OUTPUT_COLUMNS;
+ oc_len = sizeof(DEFAULT_OUTPUT_COLUMNS) - 1;
+ }
+ if ((keys = grn_table_sort_key_from_str(ctx, sortby_val, sortby_len, res, &nkeys))) {
+ grn_table_sort(ctx, res, offset, limit, sorted, keys, nkeys);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "sort(%d)", limit);
+ GRN_OBJ_FORMAT_INIT(&format, grn_table_size(ctx, res), 0, limit, offset);
+ format.flags =
+ GRN_OBJ_FORMAT_WITH_COLUMN_NAMES|
+ GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET;
+ grn_obj_columns(ctx, sorted, oc_val, oc_len, &format.columns);
+ GRN_OUTPUT_OBJ(sorted, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ grn_table_sort_key_close(ctx, keys, nkeys);
+ }
+ grn_obj_unlink(ctx, sorted);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary sort table.");
+ }
+}
+
+static inline void
+complete_add_item(grn_ctx *ctx, grn_id id, grn_obj *res, int frequency_threshold,
+ grn_obj *items_freq, grn_obj *items_boost,
+ grn_obj *item_freq, grn_obj *item_boost)
+{
+ GRN_BULK_REWIND(item_freq);
+ GRN_BULK_REWIND(item_boost);
+ grn_obj_get_value(ctx, items_freq, id, item_freq);
+ grn_obj_get_value(ctx, items_boost, id, item_boost);
+ if (GRN_INT32_VALUE(item_boost) >= 0) {
+ int32_t score;
+ score = 1 +
+ GRN_INT32_VALUE(item_freq) +
+ GRN_INT32_VALUE(item_boost);
+ if (score >= frequency_threshold) {
+ void *value;
+ if (grn_hash_add(ctx, (grn_hash *)res, &id, sizeof(grn_id),
+ &value, NULL)) {
+ grn_rset_recinfo *ri;
+ ri = value;
+ ri->score += score;
+ }
+ }
+ }
+}
+
+static void
+complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold,
+ grn_suggest_search_mode prefix_search_mode)
+{
+ grn_obj *res;
+ grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ grn_obj item_freq, item_boost;
+ GRN_INT32_INIT(&item_freq, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ if (GRN_TEXT_LEN(query)) {
+ grn_table_cursor *cur;
+ /* RK search + prefix search */
+ grn_obj *index;
+ /* FIXME: support index selection */
+ if (grn_column_index(ctx, col, GRN_OP_PREFIX, &index, 1, NULL)) {
+ if ((cur = grn_table_cursor_open(ctx, grn_ctx_at(ctx, index->header.domain),
+ GRN_TEXT_VALUE(query),
+ GRN_TEXT_LEN(query),
+ NULL, 0, 0, -1,
+ GRN_CURSOR_PREFIX|GRN_CURSOR_RK))) {
+ grn_id id;
+ while ((id = grn_table_cursor_next(ctx, cur))) {
+ grn_ii_cursor *icur;
+ if ((icur = grn_ii_cursor_open(ctx, (grn_ii *)index, id,
+ GRN_ID_NIL, GRN_ID_MAX, 1, 0))) {
+ grn_ii_posting *p;
+ while ((p = grn_ii_cursor_next(ctx, icur))) {
+ complete_add_item(ctx, p->rid, res, frequency_threshold,
+ items_freq, items_boost,
+ &item_freq, &item_boost);
+ }
+ grn_ii_cursor_close(ctx, icur);
+ }
+ }
+ grn_table_cursor_close(ctx, cur);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot open cursor for prefix RK search.");
+ }
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot find index for prefix RK search.");
+ }
+ cooccurrence_search(ctx, items, items_boost, tid, res, COMPLETE,
+ frequency_threshold,
+ conditional_probability_threshold);
+ if (((prefix_search_mode == GRN_SUGGEST_SEARCH_YES) ||
+ (prefix_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
+ !grn_table_size(ctx, res))) &&
+ (cur = grn_table_cursor_open(ctx, items,
+ GRN_TEXT_VALUE(query),
+ GRN_TEXT_LEN(query),
+ NULL, 0, 0, -1, GRN_CURSOR_PREFIX))) {
+ grn_id id;
+ while ((id = grn_table_cursor_next(ctx, cur))) {
+ complete_add_item(ctx, id, res, frequency_threshold,
+ items_freq, items_boost, &item_freq, &item_boost);
+ }
+ grn_table_cursor_close(ctx, cur);
+ }
+ }
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+ GRN_OBJ_FIN(ctx, &item_boost);
+ GRN_OBJ_FIN(ctx, &item_freq);
+}
+
+static void
+correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold,
+ grn_suggest_search_mode similar_search_mode)
+{
+ grn_obj *res;
+ grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ grn_obj item_freq2, item_boost;
+ GRN_INT32_INIT(&item_freq2, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ int32_t max_score;
+ max_score = cooccurrence_search(ctx, items, items_boost, tid, res, CORRECT,
+ frequency_threshold,
+ conditional_probability_threshold);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SCORE,
+ ":", "cooccur(%d)", max_score);
+ if (GRN_TEXT_LEN(query) &&
+ ((similar_search_mode == GRN_SUGGEST_SEARCH_YES) ||
+ (similar_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
+ max_score < frequency_threshold))) {
+ grn_obj *key, *index;
+ if ((key = grn_obj_column(ctx, items,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN))) {
+ if (grn_column_index(ctx, key, GRN_OP_MATCH, &index, 1, NULL)) {
+ grn_select_optarg optarg;
+ memset(&optarg, 0, sizeof(grn_select_optarg));
+ optarg.mode = GRN_OP_SIMILAR;
+ optarg.similarity_threshold = 0;
+ optarg.max_size = 2;
+ grn_ii_select(ctx, (grn_ii *)index, TEXT_VALUE_LEN(query),
+ (grn_hash *)res, GRN_OP_OR, &optarg);
+ grn_obj_unlink(ctx, index);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "similar(%d)", grn_table_size(ctx, res));
+ {
+ grn_hash_cursor *hc = grn_hash_cursor_open(ctx, (grn_hash *)res, NULL,
+ 0, NULL, 0, 0, -1, 0);
+ if (hc) {
+ while (grn_hash_cursor_next(ctx, hc)) {
+ void *key, *value;
+ if (grn_hash_cursor_get_key_value(ctx, hc, &key, NULL, &value)) {
+ grn_id *rp;
+ rp = key;
+ GRN_BULK_REWIND(&item_freq2);
+ GRN_BULK_REWIND(&item_boost);
+ grn_obj_get_value(ctx, items_freq2, *rp, &item_freq2);
+ grn_obj_get_value(ctx, items_boost, *rp, &item_boost);
+ if (GRN_INT32_VALUE(&item_boost) >= 0) {
+ int32_t score;
+ grn_rset_recinfo *ri;
+ score = 1 +
+ (GRN_INT32_VALUE(&item_freq2) >> 4) +
+ GRN_INT32_VALUE(&item_boost);
+ ri = value;
+ ri->score += score;
+ if (score >= frequency_threshold) { continue; }
+ }
+ /* score < frequency_threshold || item_boost < 0 */
+ grn_hash_cursor_delete(ctx, hc, NULL);
+ }
+ }
+ grn_hash_cursor_close(ctx, hc);
+ }
+ }
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "filter(%d)", grn_table_size(ctx, res));
+ {
+ /* exec _score -= edit_distance(_key, "query string") for all records */
+ grn_obj *var;
+ grn_obj *expr;
+
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, res, expr, var);
+ if (expr) {
+ grn_table_cursor *tc;
+ grn_obj *score = grn_obj_column(ctx, res,
+ GRN_COLUMN_NAME_SCORE,
+ GRN_COLUMN_NAME_SCORE_LEN);
+ grn_obj *key = grn_obj_column(ctx, res,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN);
+ grn_expr_append_obj(ctx, expr,
+ score,
+ GRN_OP_GET_VALUE, 1);
+ grn_expr_append_obj(ctx, expr,
+ grn_ctx_get(ctx, CONST_STR_LEN("edit_distance")),
+ GRN_OP_PUSH, 1);
+ grn_expr_append_obj(ctx, expr,
+ key,
+ GRN_OP_GET_VALUE, 1);
+ grn_expr_append_const(ctx, expr, query, GRN_OP_PUSH, 1);
+ grn_expr_append_op(ctx, expr, GRN_OP_CALL, 2);
+ grn_expr_append_op(ctx, expr, GRN_OP_MINUS_ASSIGN, 2);
+
+ if ((tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, 0))) {
+ grn_id id;
+ grn_obj score_value;
+ GRN_INT32_INIT(&score_value, 0);
+ while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ GRN_RECORD_SET(ctx, var, id);
+ grn_expr_exec(ctx, expr, 0);
+ GRN_BULK_REWIND(&score_value);
+ grn_obj_get_value(ctx, score, id, &score_value);
+ if (GRN_INT32_VALUE(&score_value) < frequency_threshold) {
+ grn_table_cursor_delete(ctx, tc);
+ }
+ }
+ grn_obj_unlink(ctx, &score_value);
+ grn_table_cursor_close(ctx, tc);
+ }
+ grn_obj_unlink(ctx, score);
+ grn_obj_unlink(ctx, key);
+ grn_obj_unlink(ctx, expr);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR,
+ "error on building expr. for calicurating edit distance");
+ }
+ }
+ }
+ grn_obj_unlink(ctx, key);
+ }
+ }
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+ GRN_OBJ_FIN(ctx, &item_boost);
+ GRN_OBJ_FIN(ctx, &item_freq2);
+}
+
+static void
+suggest(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold)
+{
+ grn_obj *res;
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ cooccurrence_search(ctx, items, items_boost, tid, res, SUGGEST,
+ frequency_threshold, conditional_probability_threshold);
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+}
+
+static grn_suggest_search_mode
+parse_search_mode(grn_ctx *ctx, grn_obj *mode_text)
+{
+ grn_suggest_search_mode mode;
+ int mode_length;
+
+ mode_length = GRN_TEXT_LEN(mode_text);
+ if (mode_length == 3 &&
+ strncasecmp("yes", GRN_TEXT_VALUE(mode_text), 3) == 0) {
+ mode = GRN_SUGGEST_SEARCH_YES;
+ } else if (mode_length == 2 &&
+ strncasecmp("no", GRN_TEXT_VALUE(mode_text), 2) == 0) {
+ mode = GRN_SUGGEST_SEARCH_NO;
+ } else {
+ mode = GRN_SUGGEST_SEARCH_AUTO;
+ }
+
+ return mode;
+}
+
+static grn_obj *
+command_suggest(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *items, *col, *items_boost;
+ int types;
+ int offset = 0;
+ int limit = DEFAULT_LIMIT;
+ int frequency_threshold = DEFAULT_FREQUENCY_THRESHOLD;
+ double conditional_probability_threshold =
+ DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD;
+ grn_suggest_search_mode prefix_search_mode;
+ grn_suggest_search_mode similar_search_mode;
+
+ types = grn_parse_suggest_types(VAR(0));
+ if (GRN_TEXT_LEN(VAR(6)) > 0) {
+ offset = grn_atoi(GRN_TEXT_VALUE(VAR(6)), GRN_BULK_CURR(VAR(6)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(7)) > 0) {
+ limit = grn_atoi(GRN_TEXT_VALUE(VAR(7)), GRN_BULK_CURR(VAR(7)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(8)) > 0) {
+ frequency_threshold = grn_atoi(GRN_TEXT_VALUE(VAR(8)), GRN_BULK_CURR(VAR(8)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(9)) > 0) {
+ GRN_TEXT_PUTC(ctx, VAR(9), '\0');
+ conditional_probability_threshold = strtod(GRN_TEXT_VALUE(VAR(9)), NULL);
+ }
+
+ prefix_search_mode = parse_search_mode(ctx, VAR(10));
+ similar_search_mode = parse_search_mode(ctx, VAR(11));
+
+ if ((items = grn_ctx_get(ctx, TEXT_VALUE_LEN(VAR(1))))) {
+ if ((items_boost = grn_obj_column(ctx, items, CONST_STR_LEN("boost")))) {
+ GRN_OUTPUT_MAP_OPEN("RESULT_SET", -1);
+ if (types & COMPLETE) {
+ if ((col = grn_obj_column(ctx, items, TEXT_VALUE_LEN(VAR(2))))) {
+ GRN_OUTPUT_CSTR("complete");
+ complete(ctx, items, items_boost, col, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold,
+ prefix_search_mode);
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "invalid column.");
+ }
+ }
+ if (types & CORRECT) {
+ GRN_OUTPUT_CSTR("correct");
+ correct(ctx, items, items_boost, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold,
+ similar_search_mode);
+ }
+ if (types & SUGGEST) {
+ GRN_OUTPUT_CSTR("suggest");
+ suggest(ctx, items, items_boost, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold);
+ }
+ GRN_OUTPUT_MAP_CLOSE();
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s.boost>",
+ (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1)));
+ }
+ grn_obj_unlink(ctx, items);
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "nonexistent table: <%.*s>",
+ (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1)));
+ }
+ return NULL;
+}
+
+static void
+learner_init_values(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner->post_event_id = GRN_RECORD_VALUE(learner->post_event);
+ learner->post_type_id = GRN_RECORD_VALUE(learner->post_type);
+ learner->post_item_id = GRN_RECORD_VALUE(learner->post_item);
+ learner->seq_id = GRN_RECORD_VALUE(learner->seq);
+ learner->post_time_value = GRN_TIME_VALUE(learner->post_time);
+}
+
+static void
+learner_init(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *post_event, grn_obj *post_type, grn_obj *post_item,
+ grn_obj *seq, grn_obj *post_time, grn_obj *pairs)
+{
+ learner->post_event = post_event;
+ learner->post_type = post_type;
+ learner->post_item = post_item;
+ learner->seq = seq;
+ learner->post_time = post_time;
+ learner->pairs = pairs;
+
+ learner->learn_distance_in_seconds = 0;
+
+ learner_init_values(ctx, learner);
+}
+
+static void
+learner_init_columns(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_id events_id, event_types_id;
+ grn_obj *seqs, *events, *post_item, *items, *pairs;
+
+ learner->seqs = seqs = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(learner->seq));
+ learner->seqs_events = grn_obj_column(ctx, seqs, CONST_STR_LEN("events"));
+
+ events_id = grn_obj_get_range(ctx, learner->seqs_events);
+ learner->events = events = grn_ctx_at(ctx, events_id);
+ learner->events_item = grn_obj_column(ctx, events, CONST_STR_LEN("item"));
+ learner->events_type = grn_obj_column(ctx, events, CONST_STR_LEN("type"));
+ learner->events_time = grn_obj_column(ctx, events, CONST_STR_LEN("time"));
+
+ event_types_id = grn_obj_get_range(ctx, learner->events_type);
+ learner->event_types = grn_obj_column(ctx, events, CONST_STR_LEN("time"));
+
+ post_item = learner->post_item;
+ learner->items = items = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(post_item));
+ learner->items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ learner->items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ learner->items_last = grn_obj_column(ctx, items, CONST_STR_LEN("last"));
+
+ pairs = learner->pairs;
+ learner->pairs_pre = grn_obj_column(ctx, pairs, CONST_STR_LEN("pre"));
+ learner->pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post"));
+ learner->pairs_freq0 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0"));
+ learner->pairs_freq1 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1"));
+ learner->pairs_freq2 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2"));
+}
+
+static void
+learner_fin_columns(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, learner->seqs);
+ grn_obj_unlink(ctx, learner->seqs_events);
+
+ grn_obj_unlink(ctx, learner->events);
+ grn_obj_unlink(ctx, learner->events_item);
+ grn_obj_unlink(ctx, learner->events_type);
+ grn_obj_unlink(ctx, learner->events_time);
+
+ grn_obj_unlink(ctx, learner->event_types);
+
+ grn_obj_unlink(ctx, learner->items);
+ grn_obj_unlink(ctx, learner->items_freq);
+ grn_obj_unlink(ctx, learner->items_freq2);
+ grn_obj_unlink(ctx, learner->items_last);
+
+ grn_obj_unlink(ctx, learner->pairs_pre);
+ grn_obj_unlink(ctx, learner->pairs_post);
+ grn_obj_unlink(ctx, learner->pairs_freq0);
+ grn_obj_unlink(ctx, learner->pairs_freq1);
+ grn_obj_unlink(ctx, learner->pairs_freq2);
+}
+
+static void
+learner_init_weight(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj *weight_column = NULL;
+ unsigned int weight = 1;
+
+ if (learner->configuration) {
+ weight_column = grn_obj_column(ctx,
+ learner->configuration,
+ CONST_STR_LEN("weight"));
+ }
+ if (weight_column) {
+ grn_id id;
+ id = grn_table_get(ctx, learner->configuration,
+ GRN_TEXT_VALUE(&(learner->dataset_name)),
+ GRN_TEXT_LEN(&(learner->dataset_name)));
+ if (id != GRN_ID_NIL) {
+ grn_obj weight_value;
+ GRN_UINT32_INIT(&weight_value, 0);
+ grn_obj_get_value(ctx, weight_column, id, &weight_value);
+ weight = GRN_UINT32_VALUE(&weight_value);
+ GRN_OBJ_FIN(ctx, &weight_value);
+ }
+ grn_obj_unlink(ctx, weight_column);
+ }
+
+ GRN_UINT32_INIT(&(learner->weight), 0);
+ GRN_UINT32_SET(ctx, &(learner->weight), weight);
+}
+
+static void
+learner_init_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ char events_name[GRN_TABLE_MAX_KEY_SIZE];
+ unsigned int events_name_size;
+ unsigned int events_name_prefix_size;
+
+ events_name_size = grn_obj_name(ctx, learner->events,
+ events_name, GRN_TABLE_MAX_KEY_SIZE);
+ GRN_TEXT_INIT(&(learner->dataset_name), 0);
+ events_name_prefix_size = strlen("event_");
+ if (events_name_size > events_name_prefix_size) {
+ GRN_TEXT_PUT(ctx,
+ &(learner->dataset_name),
+ events_name + events_name_prefix_size,
+ events_name_size - events_name_prefix_size);
+ }
+}
+
+static void
+learner_fin_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ GRN_OBJ_FIN(ctx, &(learner->dataset_name));
+}
+
+static void
+learner_init_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner->configuration = grn_ctx_get(ctx, "configuration", -1);
+}
+
+static void
+learner_fin_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ if (learner->configuration) {
+ grn_obj_unlink(ctx, learner->configuration);
+ }
+}
+
+static void
+learner_init_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner_init_weight(ctx, learner);
+ GRN_RECORD_INIT(&(learner->pre_events), 0, grn_obj_id(ctx, learner->events));
+}
+
+static void
+learner_fin_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, &(learner->weight));
+ grn_obj_unlink(ctx, &(learner->pre_events));
+}
+
+static void
+learner_init_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_id items_id;
+
+ learner->key_prefix = ((uint64_t)learner->post_item_id) << 32;
+
+ items_id = grn_obj_get_range(ctx, learner->events_item);
+ GRN_RECORD_INIT(&(learner->pre_item), 0, items_id);
+
+ grn_obj_get_value(ctx, learner->seqs_events, learner->seq_id,
+ &(learner->pre_events));
+}
+
+static void
+learner_fin_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, &(learner->pre_item));
+ GRN_BULK_REWIND(&(learner->pre_events));
+}
+
+static grn_bool
+learner_is_valid_input(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ return learner->post_event_id && learner->post_item_id && learner->seq_id;
+}
+
+static void
+learner_increment(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *column, grn_id record_id)
+{
+ grn_obj_set_value(ctx, column, record_id, &(learner->weight), GRN_OBJ_INCR);
+}
+
+static void
+learner_increment_item_freq(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *column)
+{
+ learner_increment(ctx, learner, column, learner->post_item_id);
+}
+
+static void
+learner_set_last_post_time(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_set_value(ctx, learner->items_last, learner->post_item_id,
+ learner->post_time, GRN_OBJ_SET);
+}
+
+static void
+learner_learn_for_complete_and_correcnt(grn_ctx *ctx,
+ grn_suggest_learner *learner)
+{
+ grn_obj *pre_item, *post_item, *pre_events;
+ grn_obj pre_type, pre_time;
+ grn_id *ep, *es;
+ uint64_t key;
+ int64_t post_time_value;
+
+ pre_item = &(learner->pre_item);
+ post_item = learner->post_item;
+ pre_events = &(learner->pre_events);
+ post_time_value = learner->post_time_value;
+ GRN_RECORD_INIT(&pre_type, 0, grn_obj_get_range(ctx, learner->events_type));
+ GRN_TIME_INIT(&pre_time, 0);
+ ep = (grn_id *)GRN_BULK_CURR(pre_events);
+ es = (grn_id *)GRN_BULK_HEAD(pre_events);
+ while (es < ep--) {
+ grn_id pair_id;
+ int added;
+ int64_t learn_distance;
+
+ GRN_BULK_REWIND(&pre_type);
+ GRN_BULK_REWIND(&pre_time);
+ GRN_BULK_REWIND(pre_item);
+ grn_obj_get_value(ctx, learner->events_type, *ep, &pre_type);
+ grn_obj_get_value(ctx, learner->events_time, *ep, &pre_time);
+ grn_obj_get_value(ctx, learner->events_item, *ep, pre_item);
+ learn_distance = post_time_value - GRN_TIME_VALUE(&pre_time);
+ if (learn_distance >= MIN_LEARN_DISTANCE) {
+ learner->learn_distance_in_seconds =
+ (int)(learn_distance / GRN_TIME_USEC_PER_SEC);
+ break;
+ }
+ key = learner->key_prefix + GRN_RECORD_VALUE(pre_item);
+ pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t),
+ &added);
+ if (added) {
+ grn_obj_set_value(ctx, learner->pairs_pre, pair_id, pre_item,
+ GRN_OBJ_SET);
+ grn_obj_set_value(ctx, learner->pairs_post, pair_id, post_item,
+ GRN_OBJ_SET);
+ }
+ if (GRN_RECORD_VALUE(&pre_type)) {
+ learner_increment(ctx, learner, learner->pairs_freq1, pair_id);
+ break;
+ } else {
+ learner_increment(ctx, learner, learner->pairs_freq0, pair_id);
+ }
+ }
+ GRN_OBJ_FIN(ctx, &pre_type);
+ GRN_OBJ_FIN(ctx, &pre_time);
+}
+
+static void
+learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ char keybuf[GRN_TABLE_MAX_KEY_SIZE];
+ int keylen = grn_table_get_key(ctx, learner->items, learner->post_item_id,
+ keybuf, GRN_TABLE_MAX_KEY_SIZE);
+ unsigned int token_flags = 0;
+ grn_token *token = grn_token_open(ctx, learner->items, keybuf, keylen,
+ GRN_TOKEN_ADD, token_flags);
+ if (token) {
+ grn_id tid;
+ grn_obj *pre_item = &(learner->pre_item);
+ grn_obj *post_item = learner->post_item;
+ grn_hash *token_ids = NULL;
+ while ((tid = grn_token_next(ctx, token)) && tid != learner->post_item_id) {
+ uint64_t key;
+ int added;
+ grn_id pair_id;
+ key = learner->key_prefix + tid;
+ pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t),
+ &added);
+ if (added) {
+ GRN_RECORD_SET(ctx, pre_item, tid);
+ grn_obj_set_value(ctx, learner->pairs_pre, pair_id,
+ pre_item, GRN_OBJ_SET);
+ grn_obj_set_value(ctx, learner->pairs_post, pair_id,
+ post_item, GRN_OBJ_SET);
+ }
+ if (!token_ids) {
+ token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+ GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
+ }
+ if (token_ids) {
+ int token_added;
+ grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added);
+ if (token_added) {
+ learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+ }
+ }
+ }
+ if (token_ids) {
+ grn_hash_close(ctx, token_ids);
+ }
+ grn_token_close(ctx, token);
+ }
+}
+
+static void
+learner_append_post_event(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ GRN_RECORD_SET(ctx, &(learner->pre_events), learner->post_event_id);
+ grn_obj_set_value(ctx, learner->seqs_events, learner->seq_id,
+ &(learner->pre_events), GRN_OBJ_APPEND);
+}
+
+static void
+learner_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ if (learner_is_valid_input(ctx, learner)) {
+ learner_init_columns(ctx, learner);
+ learner_init_dataset_name(ctx, learner);
+ learner_init_configuration(ctx, learner);
+ learner_init_buffers(ctx, learner);
+ learner_increment_item_freq(ctx, learner, learner->items_freq);
+ learner_set_last_post_time(ctx, learner);
+ if (learner->post_type_id) {
+ learner_init_submit_learn(ctx, learner);
+ learner_increment_item_freq(ctx, learner, learner->items_freq2);
+ learner_learn_for_complete_and_correcnt(ctx, learner);
+ learner_learn_for_suggest(ctx, learner);
+ learner_fin_submit_learn(ctx, learner);
+ }
+ learner_append_post_event(ctx, learner);
+ learner_fin_buffers(ctx, learner);
+ learner_fin_configuration(ctx, learner);
+ learner_fin_dataset_name(ctx, learner);
+ learner_fin_columns(ctx, learner);
+ }
+}
+
+static grn_obj *
+func_suggest_preparer(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ int learn_distance_in_seconds = 0;
+ grn_obj *obj;
+ if (nargs == 6) {
+ grn_obj *post_event = args[0];
+ grn_obj *post_type = args[1];
+ grn_obj *post_item = args[2];
+ grn_obj *seq = args[3];
+ grn_obj *post_time = args[4];
+ grn_obj *pairs = args[5];
+ grn_suggest_learner learner;
+ learner_init(ctx, &learner,
+ post_event, post_type, post_item, seq, post_time, pairs);
+ learner_learn(ctx, &learner);
+ learn_distance_in_seconds = learner.learn_distance_in_seconds;
+ }
+ if ((obj = GRN_PROC_ALLOC(GRN_DB_UINT32, 0))) {
+ GRN_UINT32_SET(ctx, obj, learn_distance_in_seconds);
+ }
+ return obj;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_expr_var vars[12];
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "types", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "column", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "query", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "sortby", -1);
+ grn_plugin_expr_var_init(ctx, &vars[5], "output_columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[6], "offset", -1);
+ grn_plugin_expr_var_init(ctx, &vars[7], "limit", -1);
+ grn_plugin_expr_var_init(ctx, &vars[8], "frequency_threshold", -1);
+ grn_plugin_expr_var_init(ctx, &vars[9], "conditional_probability_threshold", -1);
+ grn_plugin_expr_var_init(ctx, &vars[10], "prefix_search", -1);
+ grn_plugin_expr_var_init(ctx, &vars[11], "similar_search", -1);
+ grn_plugin_command_create(ctx, "suggest", -1, command_suggest, 12, vars);
+
+ grn_proc_create(ctx, CONST_STR_LEN("suggest_preparer"), GRN_PROC_FUNCTION,
+ func_suggest_preparer, NULL, NULL, 0, NULL);
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt
new file mode 100644
index 00000000000..cba4697f042
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/table/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ ${MRUBY_INCLUDE_DIRS})
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am TABLE_SOURCES)
+add_library(table MODULE ${TABLE_SOURCES})
+set_source_files_properties(${TABLE_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+set_target_properties(table PROPERTIES PREFIX "")
+target_link_libraries(table libgroonga)
+if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS table DESTINATION "${GRN_RELATIVE_PLUGINS_DIR}/table")
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/table/Makefile.am b/storage/mroonga/vendor/groonga/plugins/table/Makefile.am
new file mode 100644
index 00000000000..4b49b0a3224
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/table/Makefile.am
@@ -0,0 +1,24 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CFLAGS = \
+ $(MESSAGE_PACK_CFLAGS) \
+ $(MRUBY_CFLAGS)
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la \
+ $(MESSAGE_PACK_LIBS)
+
+table_plugins_LTLIBRARIES = table.la
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/table/sources.am b/storage/mroonga/vendor/groonga/plugins/table/sources.am
new file mode 100644
index 00000000000..943e79b1f5c
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/table/sources.am
@@ -0,0 +1,2 @@
+table_la_SOURCES = \
+ table.c
diff --git a/storage/mroonga/vendor/groonga/plugins/table/table.c b/storage/mroonga/vendor/groonga/plugins/table/table.c
new file mode 100644
index 00000000000..a4fcd17bf90
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/table/table.c
@@ -0,0 +1,747 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/* Copyright(C) 2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <string.h>
+
+#include "ctx.h"
+#include "db.h"
+#include "output.h"
+#include "util.h"
+#include <groonga/plugin.h>
+
+#define VAR GRN_PROC_GET_VAR_BY_OFFSET
+#define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x)
+
+static grn_obj *
+grn_ctx_get_table_by_name_or_id(grn_ctx *ctx,
+ const char *name, unsigned int name_len)
+{
+ grn_obj *table;
+ const char *end = name + name_len;
+ const char *rest = NULL;
+ grn_id id = grn_atoui(name, end, &rest);
+ if (rest == end) {
+ table = grn_ctx_at(ctx, id);
+ } else {
+ table = grn_ctx_get(ctx, name, name_len);
+ }
+ if (!GRN_OBJ_TABLEP(table)) {
+ ERR(GRN_INVALID_ARGUMENT, "invalid table name: <%.*s>", name_len, name);
+ if (table) {
+ grn_obj_unlink(ctx, table);
+ table = NULL;
+ }
+ }
+ return table;
+}
+
+static void
+grn_output_table_name_or_id(grn_ctx *ctx, grn_obj *table)
+{
+ if (table) {
+ if (((grn_db_obj *)table)->id & GRN_OBJ_TMP_OBJECT) {
+ GRN_OUTPUT_INT64(((grn_db_obj *)table)->id);
+ } else {
+ int name_len;
+ char name_buf[GRN_TABLE_MAX_KEY_SIZE];
+ name_len = grn_obj_name(ctx, table, name_buf, GRN_TABLE_MAX_KEY_SIZE);
+ GRN_OUTPUT_STR(name_buf, name_len);
+ }
+ } else {
+ GRN_OUTPUT_INT64(0);
+ }
+}
+
+static grn_bool
+parse_bool_value(grn_ctx *ctx, grn_obj *text)
+{
+ grn_bool value = GRN_FALSE;
+ if (GRN_TEXT_LEN(text) == 3 &&
+ memcmp("yes", GRN_TEXT_VALUE(text), 3) == 0) {
+ value = GRN_TRUE;
+ }
+ return value;
+}
+
+static grn_operator
+parse_set_operator_value(grn_ctx *ctx, grn_obj *text)
+{
+ grn_operator value = GRN_OP_OR;
+ if (GRN_TEXT_LEN(text) == 3) {
+ if (memcmp("and", GRN_TEXT_VALUE(text), 3) == 0) {
+ value = GRN_OP_AND;
+ } else if (memcmp("but", GRN_TEXT_VALUE(text), 3) == 0) {
+ value = GRN_OP_AND_NOT;
+ }
+ } else if (GRN_TEXT_LEN(text) == 6 &&
+ memcmp("adjust", GRN_TEXT_VALUE(text), 6) == 0) {
+ value = GRN_OP_ADJUST;
+ } else if (GRN_TEXT_LEN(text) == 7 &&
+ memcmp("and_not", GRN_TEXT_VALUE(text), 7) == 0) {
+ value = GRN_OP_AND_NOT;
+ }
+ return value;
+}
+
+static grn_obj *
+command_match(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *result_set = NULL;
+ grn_obj *table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0)));
+ if (table) {
+ grn_expr_flags flags = GRN_EXPR_SYNTAX_QUERY;
+ grn_obj *v, *query, *columns = NULL;
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, table, query, v);
+ if (query) {
+ if (GRN_TEXT_LEN(VAR(1))) {
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, table, columns, v);
+ if (columns) {
+ grn_expr_parse(ctx, columns, TEXT_VALUE_LEN(VAR(1)),
+ NULL, GRN_OP_MATCH, GRN_OP_AND,
+ GRN_EXPR_SYNTAX_SCRIPT);
+ }
+ }
+ if (parse_bool_value(ctx, VAR(5))) {
+ flags |= GRN_EXPR_ALLOW_COLUMN;
+ }
+ if (parse_bool_value(ctx, VAR(6))) {
+ flags |= GRN_EXPR_ALLOW_PRAGMA;
+ }
+ grn_expr_parse(ctx, query, TEXT_VALUE_LEN(VAR(2)),
+ columns, GRN_OP_MATCH, GRN_OP_AND, flags);
+ if (GRN_TEXT_LEN(VAR(3))) {
+ result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(3)));
+ } else {
+ result_set = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|
+ GRN_OBJ_WITH_SUBREC,
+ table, NULL);
+ }
+ if (result_set) {
+ grn_table_select(ctx, table, query, result_set,
+ parse_set_operator_value(ctx, VAR(4)));
+ }
+ grn_obj_unlink(ctx, columns);
+ grn_obj_unlink(ctx, query);
+ }
+ }
+ grn_output_table_name_or_id(ctx, result_set);
+ return NULL;
+}
+
+static grn_obj *
+command_filter_by_script(grn_ctx *ctx, int nargs,
+ grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *result_set = NULL;
+ grn_obj *table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0)));
+ if (table) {
+ grn_expr_flags flags = GRN_EXPR_SYNTAX_SCRIPT;
+ grn_obj *v, *query;
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, table, query, v);
+ if (query) {
+ if (parse_bool_value(ctx, VAR(4))) {
+ flags |= GRN_EXPR_ALLOW_UPDATE;
+ }
+ grn_expr_parse(ctx, query, TEXT_VALUE_LEN(VAR(1)),
+ NULL, GRN_OP_MATCH, GRN_OP_AND, flags);
+ if (GRN_TEXT_LEN(VAR(2))) {
+ result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(2)));
+ } else {
+ result_set = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|
+ GRN_OBJ_WITH_SUBREC,
+ table, NULL);
+ }
+ if (result_set) {
+ grn_table_select(ctx, table, query, result_set,
+ parse_set_operator_value(ctx, VAR(3)));
+ }
+ grn_obj_unlink(ctx, query);
+ }
+ }
+ grn_output_table_name_or_id(ctx, result_set);
+ return NULL;
+}
+
+static grn_obj *
+command_filter(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_operator operator = GRN_OP_NOP;
+ grn_obj *table, *column, *result_set = NULL;
+ if (!(table = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(0))))) {
+ goto exit;
+ }
+ if (!(column = grn_obj_column(ctx, table, TEXT_VALUE_LEN(VAR(1))))) {
+ ERR(GRN_INVALID_ARGUMENT, "invalid column name: <%.*s>",
+ (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1)));
+ goto exit;
+ }
+ if (GRN_TEXT_LEN(VAR(2)) == 0) {
+ ERR(GRN_INVALID_ARGUMENT, "missing mandatory argument: operator");
+ goto exit;
+ } else {
+ uint32_t operator_len = GRN_TEXT_LEN(VAR(2));
+ const char *operator_text = GRN_TEXT_VALUE(VAR(2));
+ switch (operator_text[0]) {
+ case '<' :
+ if (operator_len == 1) {
+ operator = GRN_OP_LESS;
+ }
+ break;
+ }
+ if (operator == GRN_OP_NOP) {
+ ERR(GRN_INVALID_ARGUMENT, "invalid operator: <%.*s>",
+ operator_len, operator_text);
+ goto exit;
+ }
+ }
+ if (GRN_TEXT_LEN(VAR(4))) {
+ result_set = grn_ctx_get_table_by_name_or_id(ctx, TEXT_VALUE_LEN(VAR(4)));
+ } else {
+ result_set = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|
+ GRN_OBJ_WITH_SUBREC,
+ table, NULL);
+ }
+ if (result_set) {
+ grn_column_filter(ctx, column, operator, VAR(3), result_set,
+ parse_set_operator_value(ctx, VAR(5)));
+ }
+exit :
+ grn_output_table_name_or_id(ctx, result_set);
+ return NULL;
+}
+
+static grn_obj *
+command_group(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ const char *table = GRN_TEXT_VALUE(VAR(0));
+ unsigned int table_len = GRN_TEXT_LEN(VAR(0));
+ const char *key = GRN_TEXT_VALUE(VAR(1));
+ unsigned int key_len = GRN_TEXT_LEN(VAR(1));
+ const char *set = GRN_TEXT_VALUE(VAR(2));
+ unsigned int set_len = GRN_TEXT_LEN(VAR(2));
+ grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len);
+ grn_obj *set_ = NULL;
+ if (table_) {
+ uint32_t ngkeys;
+ grn_table_sort_key *gkeys;
+ gkeys = grn_table_sort_key_from_str(ctx, key, key_len, table_, &ngkeys);
+ if (gkeys) {
+ if (set_len) {
+ set_ = grn_ctx_get_table_by_name_or_id(ctx, set, set_len);
+ } else {
+ set_ = grn_table_create_for_group(ctx, NULL, 0, NULL,
+ gkeys[0].key, table_, 0);
+ }
+ if (set_) {
+ if (GRN_TEXT_LEN(VAR(3))) {
+ uint32_t gap = grn_atoui(GRN_TEXT_VALUE(VAR(3)),
+ GRN_BULK_CURR(VAR(3)), NULL);
+ grn_table_group_with_range_gap(ctx, table_, gkeys, set_, gap);
+ } else {
+ grn_table_group_result g = {
+ set_, 0, 0, 1,
+ GRN_TABLE_GROUP_CALC_COUNT, 0
+ };
+ grn_table_group(ctx, table_, gkeys, 1, &g, 1);
+ }
+ }
+ grn_table_sort_key_close(ctx, gkeys, ngkeys);
+ }
+ }
+ grn_output_table_name_or_id(ctx, set_);
+ return NULL;
+}
+
+#define DEFAULT_LIMIT 10
+
+static grn_obj *
+command_sort(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ const char *table = GRN_TEXT_VALUE(VAR(0));
+ unsigned int table_len = GRN_TEXT_LEN(VAR(0));
+ const char *keys = GRN_TEXT_VALUE(VAR(1));
+ unsigned int keys_len = GRN_TEXT_LEN(VAR(1));
+ int offset = GRN_TEXT_LEN(VAR(2))
+ ? grn_atoi(GRN_TEXT_VALUE(VAR(2)), GRN_BULK_CURR(VAR(2)), NULL)
+ : 0;
+ int limit = GRN_TEXT_LEN(VAR(3))
+ ? grn_atoi(GRN_TEXT_VALUE(VAR(3)), GRN_BULK_CURR(VAR(3)), NULL)
+ : DEFAULT_LIMIT;
+ grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len);
+ grn_obj *sorted = NULL;
+ if (table_) {
+ uint32_t nkeys;
+ grn_table_sort_key *keys_;
+ if (keys_len &&
+ (keys_ = grn_table_sort_key_from_str(ctx, keys, keys_len,
+ table_, &nkeys))) {
+ if ((sorted = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_OBJ_TABLE_NO_KEY, NULL, table_))) {
+ int table_size = (int)grn_table_size(ctx, table_);
+ grn_normalize_offset_and_limit(ctx, table_size, &offset, &limit);
+ grn_table_sort(ctx, table_, offset, limit, sorted, keys_, nkeys);
+ grn_table_sort_key_close(ctx, keys_, nkeys);
+ }
+ }
+ }
+ grn_output_table_name_or_id(ctx, sorted);
+ return NULL;
+}
+
+static grn_obj *
+command_output(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ const char *table = GRN_TEXT_VALUE(VAR(0));
+ unsigned int table_len = GRN_TEXT_LEN(VAR(0));
+ const char *columns = GRN_TEXT_VALUE(VAR(1));
+ unsigned int columns_len = GRN_TEXT_LEN(VAR(1));
+ int offset = GRN_TEXT_LEN(VAR(2))
+ ? grn_atoi(GRN_TEXT_VALUE(VAR(2)), GRN_BULK_CURR(VAR(2)), NULL)
+ : 0;
+ int limit = GRN_TEXT_LEN(VAR(3))
+ ? grn_atoi(GRN_TEXT_VALUE(VAR(3)), GRN_BULK_CURR(VAR(3)), NULL)
+ : DEFAULT_LIMIT;
+ grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len);
+ if (table_) {
+ grn_obj_format format;
+ int table_size = (int)grn_table_size(ctx, table_);
+ GRN_OBJ_FORMAT_INIT(&format, table_size, 0, limit, offset);
+ format.flags =
+ GRN_OBJ_FORMAT_WITH_COLUMN_NAMES|
+ GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET;
+ /* TODO: accept only comma separated expr as columns */
+ grn_obj_columns(ctx, table_, columns, columns_len, &format.columns);
+ GRN_OUTPUT_OBJ(table_, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ }
+ return NULL;
+}
+
+static grn_obj *
+command_each(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ const char *table = GRN_TEXT_VALUE(VAR(0));
+ unsigned int table_len = GRN_TEXT_LEN(VAR(0));
+ const char *expr = GRN_TEXT_VALUE(VAR(1));
+ unsigned int expr_len = GRN_TEXT_LEN(VAR(1));
+ grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len);
+ if (table_) {
+ grn_obj *v, *expr_;
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, table_, expr_, v);
+ if (expr_ && v) {
+ grn_table_cursor *tc;
+ grn_expr_parse(ctx, expr_, expr, expr_len,
+ NULL, GRN_OP_MATCH, GRN_OP_AND,
+ GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE);
+ if ((tc = grn_table_cursor_open(ctx, table_, NULL, 0,
+ NULL, 0, 0, -1, 0))) {
+ grn_id id;
+ while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ GRN_RECORD_SET(ctx, v, id);
+ grn_expr_exec(ctx, expr_, 0);
+ }
+ grn_table_cursor_close(ctx, tc);
+ }
+ grn_obj_unlink(ctx, expr_);
+ }
+ }
+ GRN_OUTPUT_BOOL(!ctx->rc);
+ return NULL;
+}
+
+static grn_obj *
+command_unlink(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ const char *table = GRN_TEXT_VALUE(VAR(0));
+ unsigned int table_len = GRN_TEXT_LEN(VAR(0));
+ grn_obj *table_ = grn_ctx_get_table_by_name_or_id(ctx, table, table_len);
+ if (table_) {
+ grn_obj_unlink(ctx, table_);
+ }
+ GRN_OUTPUT_BOOL(!ctx->rc);
+ return NULL;
+}
+
+static grn_obj *
+command_add(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_load_(ctx, GRN_CONTENT_JSON,
+ GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)),
+ NULL, 0,
+ GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)),
+ NULL, 0, NULL, 0, 0);
+ GRN_OUTPUT_BOOL(ctx->impl->loader.nrecords);
+ if (ctx->impl->loader.table) {
+ grn_db_touch(ctx, DB_OBJ(ctx->impl->loader.table)->db);
+ }
+ return NULL;
+}
+
+static grn_obj *
+command_set(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ int table_name_len = GRN_TEXT_LEN(VAR(0));
+ const char *table_name = GRN_TEXT_VALUE(VAR(0));
+ grn_obj *table = grn_ctx_get(ctx, table_name, table_name_len);
+ if (table) {
+ grn_id id = GRN_ID_NIL;
+ int key_len = GRN_TEXT_LEN(VAR(2));
+ int id_len = GRN_TEXT_LEN(VAR(5));
+ if (key_len) {
+ const char *key = GRN_TEXT_VALUE(VAR(2));
+ id = grn_table_get(ctx, table, key, key_len);
+ } else {
+ if (id_len) {
+ id = grn_atoui(GRN_TEXT_VALUE(VAR(5)), GRN_BULK_CURR(VAR(5)), NULL);
+ }
+ id = grn_table_at(ctx, table, id);
+ }
+ if (id) {
+ grn_obj obj;
+ grn_obj_format format;
+ GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id);
+ GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0);
+ GRN_RECORD_SET(ctx, &obj, id);
+ grn_obj_columns(ctx, table,
+ GRN_TEXT_VALUE(VAR(4)),
+ GRN_TEXT_LEN(VAR(4)), &format.columns);
+ format.flags = 0 /* GRN_OBJ_FORMAT_WITH_COLUMN_NAMES */;
+ GRN_OUTPUT_OBJ(&obj, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT,
+ "nonexistent table name: <%.*s>", table_name_len, table_name);
+ }
+ return NULL;
+}
+
+static grn_rc
+command_get_resolve_parameters(grn_ctx *ctx, grn_user_data *user_data,
+ grn_obj **table, grn_id *id)
+{
+ const char *table_text, *id_text, *key_text;
+ int table_length, id_length, key_length;
+
+ table_text = GRN_TEXT_VALUE(VAR(0));
+ table_length = GRN_TEXT_LEN(VAR(0));
+ if (table_length == 0) {
+ ERR(GRN_INVALID_ARGUMENT, "[table][get] table isn't specified");
+ return ctx->rc;
+ }
+
+ *table = grn_ctx_get(ctx, table_text, table_length);
+ if (!*table) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] table doesn't exist: <%.*s>", table_length, table_text);
+ return ctx->rc;
+ }
+
+ key_text = GRN_TEXT_VALUE(VAR(1));
+ key_length = GRN_TEXT_LEN(VAR(1));
+ id_text = GRN_TEXT_VALUE(VAR(3));
+ id_length = GRN_TEXT_LEN(VAR(3));
+ switch ((*table)->header.type) {
+ case GRN_TABLE_NO_KEY:
+ if (key_length) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] should not specify key for NO_KEY table: <%.*s>: "
+ "table: <%.*s>",
+ key_length, key_text,
+ table_length, table_text);
+ return ctx->rc;
+ }
+ if (id_length) {
+ const char *rest = NULL;
+ *id = grn_atoi(id_text, id_text + id_length, &rest);
+ if (rest == id_text) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] ID should be a number: <%.*s>: table: <%.*s>",
+ id_length, id_text,
+ table_length, table_text);
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] ID isn't specified: table: <%.*s>",
+ table_length, table_text);
+ }
+ break;
+ case GRN_TABLE_HASH_KEY:
+ case GRN_TABLE_PAT_KEY:
+ case GRN_TABLE_DAT_KEY:
+ if (key_length && id_length) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] should not specify both key and ID: "
+ "key: <%.*s>: ID: <%.*s>: table: <%.*s>",
+ key_length, key_text,
+ id_length, id_text,
+ table_length, table_text);
+ return ctx->rc;
+ }
+ if (key_length) {
+ *id = grn_table_get(ctx, *table, key_text, key_length);
+ if (!*id) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] nonexistent key: <%.*s>: table: <%.*s>",
+ key_length, key_text,
+ table_length, table_text);
+ }
+ } else {
+ if (id_length) {
+ const char *rest = NULL;
+ *id = grn_atoi(id_text, id_text + id_length, &rest);
+ if (rest == id_text) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] ID should be a number: <%.*s>: table: <%.*s>",
+ id_length, id_text,
+ table_length, table_text);
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] key nor ID isn't specified: table: <%.*s>",
+ table_length, table_text);
+ }
+ }
+ break;
+ default:
+ ERR(GRN_INVALID_ARGUMENT,
+ "[table][get] not a table: <%.*s>", table_length, table_text);
+ break;
+ }
+
+ return ctx->rc;
+}
+
+static grn_obj *
+command_get(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_id id = GRN_ID_NIL;
+ grn_obj *table = NULL;
+ if (!command_get_resolve_parameters(ctx, user_data, &table, &id)) {
+ grn_obj obj;
+ grn_obj_format format;
+ GRN_OUTPUT_ARRAY_OPEN("RESULT", 2);
+ GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id);
+ GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0);
+ GRN_RECORD_SET(ctx, &obj, id);
+ grn_obj_columns(ctx, table, GRN_TEXT_VALUE(VAR(2)), GRN_TEXT_LEN(VAR(2)),
+ &format.columns);
+ format.flags =
+ GRN_OBJ_FORMAT_WITH_COLUMN_NAMES |
+ GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET;
+ GRN_OUTPUT_OBJ(&obj, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ GRN_OUTPUT_ARRAY_CLOSE();
+ }
+ return NULL;
+}
+
+static grn_obj *
+command_push(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)));
+ if (table) {
+ switch (table->header.type) {
+ case GRN_TABLE_NO_KEY:
+ {
+ grn_array *array = (grn_array *)table;
+ grn_table_queue *queue = grn_array_queue(ctx, array);
+ if (queue) {
+ MUTEX_LOCK(queue->mutex);
+ if (grn_table_queue_head(queue) == queue->cap) {
+ grn_array_clear_curr_rec(ctx, array);
+ }
+ grn_load_(ctx, GRN_CONTENT_JSON,
+ GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)),
+ NULL, 0,
+ GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)),
+ NULL, 0, NULL, 0, 0);
+ if (grn_table_queue_size(queue) == queue->cap) {
+ grn_table_queue_tail_increment(queue);
+ }
+ grn_table_queue_head_increment(queue);
+ COND_SIGNAL(queue->cond);
+ MUTEX_UNLOCK(queue->mutex);
+ GRN_OUTPUT_BOOL(ctx->impl->loader.nrecords);
+ if (ctx->impl->loader.table) {
+ grn_db_touch(ctx, DB_OBJ(ctx->impl->loader.table)->db);
+ }
+ } else {
+ ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support push",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ }
+ break;
+ default :
+ ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support push",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "table '%.*s' does not exist.",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ return NULL;
+}
+
+static grn_obj *
+command_pull(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0)));
+ if (table) {
+ switch (table->header.type) {
+ case GRN_TABLE_NO_KEY:
+ {
+ grn_array *array = (grn_array *)table;
+ grn_table_queue *queue = grn_array_queue(ctx, array);
+ if (queue) {
+ MUTEX_LOCK(queue->mutex);
+ while (grn_table_queue_size(queue) == 0) {
+ if (GRN_TEXT_LEN(VAR(2))) {
+ MUTEX_UNLOCK(queue->mutex);
+ GRN_OUTPUT_BOOL(0);
+ return NULL;
+ }
+ COND_WAIT(queue->cond, queue->mutex);
+ }
+ grn_table_queue_tail_increment(queue);
+ {
+ grn_obj obj;
+ grn_obj_format format;
+ GRN_RECORD_INIT(&obj, 0, ((grn_db_obj *)table)->id);
+ GRN_OBJ_FORMAT_INIT(&format, 1, 0, 1, 0);
+ GRN_RECORD_SET(ctx, &obj, grn_table_queue_tail(queue));
+ grn_obj_columns(ctx, table, GRN_TEXT_VALUE(VAR(1)), GRN_TEXT_LEN(VAR(1)),
+ &format.columns);
+ format.flags = 0 /* GRN_OBJ_FORMAT_WITH_COLUMN_NAMES */;
+ GRN_OUTPUT_OBJ(&obj, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ }
+ MUTEX_UNLOCK(queue->mutex);
+ } else {
+ ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support pull",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ }
+ break;
+ default :
+ ERR(GRN_OPERATION_NOT_SUPPORTED, "table '%.*s' doesn't support pull",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "table '%.*s' does not exist.",
+ (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0)));
+ }
+ return NULL;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_expr_var vars[18];
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "expression", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "result_set", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "set_operation", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "allow_update", -1);
+ grn_plugin_command_create(ctx, "filter_by_script", -1, command_filter_by_script, 5, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "column", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "operator", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "value", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "result_set", -1);
+ grn_plugin_expr_var_init(ctx, &vars[5], "set_operation", -1);
+ grn_plugin_command_create(ctx, "filter", -1, command_filter, 6, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "key", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "result_set", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "range_gap", -1);
+ grn_plugin_command_create(ctx, "group", -1, command_group, 4, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "keys", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "offset", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "limit", -1);
+ grn_plugin_command_create(ctx, "sort", -1, command_sort, 4, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "offset", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "limit", -1);
+ grn_plugin_command_create(ctx, "output", -1, command_output, 4, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "expression", -1);
+ grn_plugin_command_create(ctx, "each", -1, command_each, 2, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_command_create(ctx, "unlink", -1, command_unlink, 1, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "values", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "key", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "output_columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[5], "id", -1);
+ grn_plugin_command_create(ctx, "add", -1, command_add, 2, vars);
+ grn_plugin_command_create(ctx, "push", -1, command_push, 2, vars);
+ grn_plugin_command_create(ctx, "set", -1, command_set, 6, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "key", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "output_columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "id", -1);
+ grn_plugin_command_create(ctx, "get", -1, command_get, 4, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "output_columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "non_block", -1);
+ grn_plugin_command_create(ctx, "pull", -1, command_pull, 3, vars);
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "query", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "result_set", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "set_operation", -1);
+ grn_plugin_expr_var_init(ctx, &vars[5], "allow_column_expression", -1);
+ grn_plugin_expr_var_init(ctx, &vars[6], "allow_pragma", -1);
+ grn_plugin_command_create(ctx, "match", -1, command_match, 7, vars);
+
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt
new file mode 100644
index 00000000000..e044c1fbb93
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt
@@ -0,0 +1,53 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(TOKENIZERS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/tokenizers")
+if(GRN_WITH_MECAB)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/mecab_sources.am MECAB_SOURCES)
+ include_directories(${MECAB_INCLUDE_DIRS})
+ link_directories(${MECAB_LIBRARY_DIRS})
+ add_library(mecab_tokenizer MODULE ${MECAB_SOURCES})
+ set_source_files_properties(${MECAB_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+ set_target_properties(mecab_tokenizer PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "mecab")
+ target_link_libraries(mecab_tokenizer libgroonga ${MECAB_LIBRARIES})
+ if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS mecab_tokenizer DESTINATION "${TOKENIZERS_DIR}")
+ endif()
+endif()
+
+if(GRN_WITH_KYTEA)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/kytea_sources.am KYTEA_SOURCES)
+ include_directories(${KYTEA_INCLUDE_DIRS})
+ link_directories(${KYTEA_LIBRARY_DIRS})
+ add_library(kytea_tokenizer MODULE ${KYTEA_SOURCES})
+ set_source_files_properties(${KYTEA_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_CXX_COMPILE_FLAGS}")
+ set_target_properties(kytea_tokenizer PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "kytea")
+ target_link_libraries(kytea_tokenizer libgroonga ${KYTEA_LIBRARIES})
+ if(NOT MRN_GROONGA_BUNDLED)
+ install(TARGETS kytea_tokenizer DESTINATION "${TOKENIZERS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am
new file mode 100644
index 00000000000..386b1554f73
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am
@@ -0,0 +1,33 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+tokenizers_plugins_LTLIBRARIES =
+if WITH_MECAB
+tokenizers_plugins_LTLIBRARIES += mecab.la
+endif
+if WITH_KYTEA
+tokenizers_plugins_LTLIBRARIES += kytea.la
+endif
+
+include mecab_sources.am
+mecab_la_CPPFLAGS = $(AM_CPPFLAGS) $(MECAB_CPPFLAGS)
+mecab_la_LIBADD = $(LIBS) $(MECAB_LIBS)
+mecab_la_LDFLAGS = $(AM_LDFLAGS) $(MECAB_LDFLAGS)
+
+include kytea_sources.am
+kytea_la_CPPFLAGS = $(AM_CPPFLAGS) $(KYTEA_CFLAGS)
+kytea_la_LIBADD = $(LIBS) $(KYTEA_LIBS)
+kytea_la_LDFLAGS = $(AM_LDFLAGS) $(KYTEA_LDFLAGS)
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp
new file mode 100644
index 00000000000..a7ee4104592
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp
@@ -0,0 +1,354 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <groonga/tokenizer.h>
+
+#include <kytea/kytea.h>
+#include <kytea/string-util.h>
+
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+namespace {
+
+grn_plugin_mutex *kytea_mutex = NULL;
+kytea::KyteaConfig *kytea_config = NULL;
+kytea::Kytea *kytea_tagger = NULL;
+kytea::StringUtil *kytea_util = NULL;
+
+void kytea_init(grn_ctx *ctx);
+void kytea_fin(grn_ctx *ctx);
+
+void kytea_init(grn_ctx *ctx) {
+ if (kytea_mutex || kytea_config || kytea_tagger || kytea_util) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "TokenKytea is already initialized");
+ return;
+ }
+
+ kytea_mutex = grn_plugin_mutex_open(ctx);
+ if (!kytea_mutex) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "grn_plugin_mutex_open() failed");
+ return;
+ }
+
+ kytea::KyteaConfig * const config = static_cast<kytea::KyteaConfig *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(kytea::KyteaConfig)));
+ if (!config) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to kytea::KyteaConfig failed");
+ return;
+ }
+
+ try {
+ new (config) kytea::KyteaConfig;
+ kytea_config = config;
+ try {
+ kytea_config->setDebug(0);
+ kytea_config->setOnTraining(false);
+ kytea_config->parseRunCommandLine(0, NULL);
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::KyteaConfig settings failed");
+ return;
+ }
+ } catch (...) {
+ GRN_PLUGIN_FREE(ctx, config);
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::KyteaConfig initialization failed");
+ return;
+ }
+
+ kytea::Kytea * const tagger = static_cast<kytea::Kytea *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(kytea::Kytea)));
+ if (!tagger) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to kytea::Kytea failed");
+ return;
+ }
+
+ try {
+ new (tagger) kytea::Kytea;
+ kytea_tagger = tagger;
+ try {
+ kytea_tagger->readModel(kytea_config->getModelFile().c_str());
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea::readModel() failed");
+ return;
+ }
+ } catch (...) {
+ GRN_PLUGIN_FREE(ctx, tagger);
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea initialization failed");
+ return;
+ }
+
+ try {
+ kytea_util = kytea_tagger->getStringUtil();
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea::getStringUtil() failed");
+ return;
+ }
+}
+
+void kytea_fin(grn_ctx *ctx) {
+ kytea_util = NULL;
+
+ if (kytea_tagger) {
+ kytea_tagger->~Kytea();
+ GRN_PLUGIN_FREE(ctx, kytea_tagger);
+ kytea_tagger = NULL;
+ }
+
+ if (kytea_config) {
+ kytea_config->~KyteaConfig();
+ GRN_PLUGIN_FREE(ctx, kytea_config);
+ kytea_config = NULL;
+ }
+
+ if (kytea_mutex) {
+ grn_plugin_mutex_close(ctx, kytea_mutex);
+ kytea_mutex = NULL;
+ }
+}
+
+struct grn_tokenizer_kytea {
+ grn_tokenizer_query *query;
+ kytea::KyteaSentence sentence;
+ std::vector<std::string> tokens;
+ std::size_t id;
+ grn_tokenizer_token token;
+ const char *rest_query_string;
+ unsigned int rest_query_string_length;
+
+ grn_tokenizer_kytea() :
+ query(NULL),
+ sentence(),
+ tokens(),
+ id(0),
+ token(),
+ rest_query_string(NULL)
+ {
+ }
+ ~grn_tokenizer_kytea() {}
+};
+
+void grn_tokenizer_kytea_init(grn_ctx *ctx, grn_tokenizer_kytea *tokenizer) {
+ new (tokenizer) grn_tokenizer_kytea;
+ grn_tokenizer_token_init(ctx, &tokenizer->token);
+}
+
+void grn_tokenizer_kytea_fin(grn_ctx *ctx, grn_tokenizer_kytea *tokenizer) {
+ grn_tokenizer_token_fin(ctx, &tokenizer->token);
+ if (tokenizer->query) {
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ }
+ tokenizer->~grn_tokenizer_kytea();
+}
+
+grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ unsigned int normalizer_flags = 0;
+ grn_tokenizer_query * const query =
+ grn_tokenizer_query_open(ctx, num_args, args, normalizer_flags);
+ if (!query) {
+ return NULL;
+ }
+
+ grn_tokenizer_kytea * const tokenizer = static_cast<grn_tokenizer_kytea *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_kytea)));
+ if (!tokenizer) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to grn_tokenizer_kytea failed");
+ return NULL;
+ }
+
+ try {
+ grn_tokenizer_kytea_init(ctx, tokenizer);
+ } catch (...) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "tokenizer initialization failed");
+ return NULL;
+ }
+
+ tokenizer->query = query;
+
+ grn_obj *normalized_query = query->normalized_query;
+ const char *normalized_string;
+ unsigned int normalized_string_length;
+ grn_string_get_normalized(ctx,
+ normalized_query,
+ &normalized_string,
+ &normalized_string_length,
+ NULL);
+ if (tokenizer->query->have_tokenized_delimiter) {
+ tokenizer->rest_query_string = normalized_string;
+ tokenizer->rest_query_string_length = normalized_string_length;
+ } else {
+ grn_plugin_mutex_lock(ctx, kytea_mutex);
+ try {
+ const std::string str(normalized_string, normalized_string_length);
+ const kytea::KyteaString &surface_str = kytea_util->mapString(str);
+ const kytea::KyteaString &normalized_str = kytea_util->normalize(surface_str);
+ tokenizer->sentence = kytea::KyteaSentence(surface_str, normalized_str);
+ kytea_tagger->calculateWS(tokenizer->sentence);
+ } catch (...) {
+ grn_plugin_mutex_unlock(ctx, kytea_mutex);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "tokenization failed");
+ return NULL;
+ }
+ grn_plugin_mutex_unlock(ctx, kytea_mutex);
+
+ try {
+ for (std::size_t i = 0; i < tokenizer->sentence.words.size(); ++i) {
+ const std::string &token =
+ kytea_util->showString(tokenizer->sentence.words[i].surface);
+ const char *ptr = token.c_str();
+ unsigned int left = static_cast<unsigned int>(token.length());
+ while (left > 0) {
+ const int char_length =
+ grn_tokenizer_charlen(ctx, ptr, left, query->encoding);
+ if ((char_length == 0) ||
+ (grn_tokenizer_isspace(ctx, ptr, left, query->encoding) != 0)) {
+ break;
+ }
+ ptr += char_length;
+ left -= char_length;
+ }
+ if (left == 0) {
+ tokenizer->tokens.push_back(token);
+ }
+ }
+ } catch (...) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "adjustment failed");
+ return NULL;
+ }
+ }
+
+ user_data->ptr = tokenizer;
+ return NULL;
+}
+
+grn_obj *grn_kytea_next(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ grn_tokenizer_kytea * const tokenizer =
+ static_cast<grn_tokenizer_kytea *>(user_data->ptr);
+
+ if (tokenizer->query->have_tokenized_delimiter) {
+ unsigned int rest_query_string_length =
+ tokenizer->rest_query_string_length;
+ const char *rest_query_string =
+ grn_tokenizer_tokenized_delimiter_next(ctx,
+ &(tokenizer->token),
+ tokenizer->rest_query_string,
+ rest_query_string_length,
+ tokenizer->query->encoding);
+ if (rest_query_string) {
+ tokenizer->rest_query_string_length -=
+ rest_query_string - tokenizer->rest_query_string;
+ }
+ tokenizer->rest_query_string = rest_query_string;
+ } else {
+ const grn_tokenizer_status status =
+ ((tokenizer->id + 1) < tokenizer->tokens.size()) ?
+ GRN_TOKENIZER_CONTINUE : GRN_TOKENIZER_LAST;
+ if (tokenizer->id < tokenizer->tokens.size()) {
+ const std::string &token = tokenizer->tokens[tokenizer->id++];
+ grn_tokenizer_token_push(ctx, &tokenizer->token,
+ token.c_str(), token.length(), status);
+ } else {
+ grn_tokenizer_token_push(ctx, &tokenizer->token, "", 0, status);
+ }
+ }
+
+ return NULL;
+}
+
+grn_obj *grn_kytea_fin(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ grn_tokenizer_kytea * const tokenizer =
+ static_cast<grn_tokenizer_kytea *>(user_data->ptr);
+ if (tokenizer) {
+ grn_tokenizer_kytea_fin(ctx, tokenizer);
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ }
+ return NULL;
+}
+
+} // namespace
+
+extern "C" {
+
+/*
+ GRN_PLUGIN_INIT() is called to initialize this plugin. Note that an error
+ code must be set in `ctx->rc' on failure.
+ */
+grn_rc GRN_PLUGIN_INIT(grn_ctx *ctx) {
+ kytea_init(ctx);
+ return ctx->rc;
+}
+
+/*
+ GRN_PLUGIN_REGISTER() registers this plugin to the database associated with
+ `ctx'. The registration requires the plugin name and the functions to be
+ called for tokenization.
+ */
+grn_rc GRN_PLUGIN_REGISTER(grn_ctx *ctx) {
+ return grn_tokenizer_register(ctx, "TokenKytea", 10, grn_kytea_init,
+ grn_kytea_next, grn_kytea_fin);
+}
+
+/*
+ GRN_PLUGIN_FIN() is called to finalize the plugin that was initialized by
+ GRN_PLUGIN_INIT().
+ */
+grn_rc GRN_PLUGIN_FIN(grn_ctx *ctx) {
+ kytea_fin(ctx);
+ return GRN_SUCCESS;
+}
+
+} // extern "C"
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am
new file mode 100644
index 00000000000..182f38577e3
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am
@@ -0,0 +1,2 @@
+kytea_la_SOURCES = \
+ kytea.cpp
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c
new file mode 100644
index 00000000000..4ac99f9a9e8
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c
@@ -0,0 +1,338 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2009-2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <str.h>
+
+#include <groonga.h>
+#include <groonga/tokenizer.h>
+
+#include <mecab.h>
+
+#include <string.h>
+#include <ctype.h>
+
+static mecab_t *sole_mecab = NULL;
+static grn_plugin_mutex *sole_mecab_mutex = NULL;
+static grn_encoding sole_mecab_encoding = GRN_ENC_NONE;
+
+typedef struct {
+ mecab_t *mecab;
+ grn_obj buf;
+ const char *next;
+ const char *end;
+ grn_tokenizer_query *query;
+ grn_tokenizer_token token;
+} grn_mecab_tokenizer;
+
+static grn_encoding
+translate_mecab_charset_to_grn_encoding(const char *charset)
+{
+ if (strcasecmp(charset, "euc-jp") == 0) {
+ return GRN_ENC_EUC_JP;
+ } else if (strcasecmp(charset, "utf-8") == 0 ||
+ strcasecmp(charset, "utf8") == 0) {
+ return GRN_ENC_UTF8;
+ } else if (strcasecmp(charset, "shift_jis") == 0 ||
+ strcasecmp(charset, "shift-jis") == 0 ||
+ strcasecmp(charset, "sjis") == 0) {
+ return GRN_ENC_SJIS;
+ }
+ return GRN_ENC_NONE;
+}
+
+static grn_encoding
+get_mecab_encoding(mecab_t *mecab)
+{
+ grn_encoding encoding = GRN_ENC_NONE;
+ const mecab_dictionary_info_t *dictionary_info;
+ dictionary_info = mecab_dictionary_info(mecab);
+ if (dictionary_info) {
+ const char *charset = dictionary_info->charset;
+ encoding = translate_mecab_charset_to_grn_encoding(charset);
+ }
+ return encoding;
+}
+
+/*
+ This function is called for a full text search query or a document to be
+ indexed. This means that both short/long strings are given.
+ The return value of this function is ignored. When an error occurs in this
+ function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
+ */
+static grn_obj *
+mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ const char *s;
+ grn_mecab_tokenizer *tokenizer;
+ unsigned int normalizer_flags = 0;
+ grn_tokenizer_query *query;
+ grn_obj *normalized_query;
+ const char *normalized_string;
+ unsigned int normalized_string_length;
+
+ query = grn_tokenizer_query_open(ctx, nargs, args, normalizer_flags);
+ if (!query) {
+ return NULL;
+ }
+ if (!sole_mecab) {
+ grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
+ if (!sole_mecab) {
+ sole_mecab = mecab_new2("-Owakati");
+ if (!sole_mecab) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "mecab_new2() failed on mecab_init(): %s",
+ mecab_strerror(NULL));
+ } else {
+ sole_mecab_encoding = get_mecab_encoding(sole_mecab);
+ }
+ }
+ grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
+ }
+ if (!sole_mecab) {
+ grn_tokenizer_query_close(ctx, query);
+ return NULL;
+ }
+
+ if (query->encoding != sole_mecab_encoding) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "MeCab dictionary charset (%s) does not match "
+ "the table encoding: <%s>",
+ grn_encoding_to_string(sole_mecab_encoding),
+ grn_encoding_to_string(query->encoding));
+ return NULL;
+ }
+
+ if (!(tokenizer = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_mecab_tokenizer)))) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][mecab] "
+ "memory allocation to grn_mecab_tokenizer failed");
+ return NULL;
+ }
+ tokenizer->mecab = sole_mecab;
+ tokenizer->query = query;
+
+ normalized_query = query->normalized_query;
+ grn_string_get_normalized(ctx,
+ normalized_query,
+ &normalized_string,
+ &normalized_string_length,
+ NULL);
+ GRN_TEXT_INIT(&(tokenizer->buf), 0);
+ if (query->have_tokenized_delimiter) {
+ tokenizer->next = normalized_string;
+ tokenizer->end = tokenizer->next + normalized_string_length;
+ } else if (normalized_string_length == 0) {
+ tokenizer->next = "";
+ tokenizer->end = tokenizer->next;
+ } else {
+ grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
+ s = mecab_sparse_tostr2(tokenizer->mecab,
+ normalized_string,
+ normalized_string_length);
+ if (!s) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "mecab_sparse_tostr() failed len=%d err=%s",
+ normalized_string_length,
+ mecab_strerror(tokenizer->mecab));
+ } else {
+ GRN_TEXT_PUTS(ctx, &(tokenizer->buf), s);
+ }
+ grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
+ if (!s) {
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ return NULL;
+ }
+ {
+ char *buf, *p;
+ unsigned int bufsize;
+
+ buf = GRN_TEXT_VALUE(&(tokenizer->buf));
+ bufsize = GRN_TEXT_LEN(&(tokenizer->buf));
+ /* A certain version of mecab returns trailing lf or spaces. */
+ for (p = buf + bufsize - 2;
+ buf <= p && isspace(*(unsigned char *)p);
+ p--) { *p = '\0'; }
+ tokenizer->next = buf;
+ tokenizer->end = p + 1;
+ }
+ }
+ user_data->ptr = tokenizer;
+
+ grn_tokenizer_token_init(ctx, &(tokenizer->token));
+
+ return NULL;
+}
+
+/*
+ This function returns tokens one by one.
+ */
+static grn_obj *
+mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ /* grn_obj *table = args[0]; */
+ grn_mecab_tokenizer *tokenizer = user_data->ptr;
+ grn_encoding encoding = tokenizer->query->encoding;
+
+ if (tokenizer->query->have_tokenized_delimiter) {
+ tokenizer->next =
+ grn_tokenizer_tokenized_delimiter_next(ctx,
+ &(tokenizer->token),
+ tokenizer->next,
+ tokenizer->end - tokenizer->next,
+ encoding);
+ } else {
+ size_t cl;
+ const char *p = tokenizer->next, *r;
+ const char *e = tokenizer->end;
+ grn_tokenizer_status status;
+
+ for (r = p; r < e; r += cl) {
+ if (!(cl = grn_charlen_(ctx, r, e, encoding))) {
+ tokenizer->next = e;
+ break;
+ }
+ if (grn_isspace(r, encoding)) {
+ const char *q = r;
+ while ((cl = grn_isspace(q, encoding))) { q += cl; }
+ tokenizer->next = q;
+ break;
+ }
+ }
+
+ if (r == e) {
+ status = GRN_TOKENIZER_LAST;
+ } else {
+ status = GRN_TOKENIZER_CONTINUE;
+ }
+ grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
+ }
+
+ return NULL;
+}
+
+/*
+ This function finalizes a tokenization.
+ */
+static grn_obj *
+mecab_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_mecab_tokenizer *tokenizer = user_data->ptr;
+ if (!tokenizer) {
+ return NULL;
+ }
+ grn_tokenizer_token_fin(ctx, &(tokenizer->token));
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ grn_obj_unlink(ctx, &(tokenizer->buf));
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ return NULL;
+}
+
+static void
+check_mecab_dictionary_encoding(grn_ctx *ctx)
+{
+#ifdef HAVE_MECAB_DICTIONARY_INFO_T
+ mecab_t *mecab;
+
+ mecab = mecab_new2("-Owakati");
+ if (mecab) {
+ grn_encoding encoding;
+ int have_same_encoding_dictionary = 0;
+
+ encoding = GRN_CTX_GET_ENCODING(ctx);
+ have_same_encoding_dictionary = encoding == get_mecab_encoding(mecab);
+ mecab_destroy(mecab);
+
+ if (!have_same_encoding_dictionary) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "MeCab has no dictionary that uses the context encoding"
+ ": <%s>",
+ grn_encoding_to_string(encoding));
+ }
+ } else {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "mecab_new2 failed in check_mecab_dictionary_encoding: %s",
+ mecab_strerror(NULL));
+ }
+#endif
+}
+
+/*
+ This function initializes a plugin. This function fails if there is no
+ dictionary that uses the context encoding of groonga.
+ */
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ sole_mecab = NULL;
+ sole_mecab_mutex = grn_plugin_mutex_open(ctx);
+ if (!sole_mecab_mutex) {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][mecab] grn_plugin_mutex_open() failed");
+ return ctx->rc;
+ }
+
+ check_mecab_dictionary_encoding(ctx);
+ return ctx->rc;
+}
+
+/*
+ This function registers a plugin to a database.
+ */
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc;
+
+ rc = grn_tokenizer_register(ctx, "TokenMecab", 10,
+ mecab_init, mecab_next, mecab_fin);
+ if (rc == GRN_SUCCESS) {
+ grn_obj *token_mecab;
+ token_mecab = grn_ctx_get(ctx, "TokenMecab", 10);
+ /* Just for backward compatibility. TokenMecab was built-in not plugin. */
+ if (token_mecab && grn_obj_id(ctx, token_mecab) != GRN_DB_MECAB) {
+ rc = GRN_FILE_CORRUPT;
+ }
+ }
+
+ return rc;
+}
+
+/*
+ This function finalizes a plugin.
+ */
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ if (sole_mecab) {
+ mecab_destroy(sole_mecab);
+ sole_mecab = NULL;
+ }
+ if (sole_mecab_mutex) {
+ grn_plugin_mutex_close(ctx, sole_mecab_mutex);
+ sole_mecab_mutex = NULL;
+ }
+
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am
new file mode 100644
index 00000000000..56912727c3d
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am
@@ -0,0 +1,2 @@
+mecab_la_SOURCES = \
+ mecab.c