From 7f3d55508790c928fc57b6a6ada79c8202598a58 Mon Sep 17 00:00:00 2001 From: Kentoku SHIBA Date: Tue, 21 Oct 2014 04:51:38 +0900 Subject: Update Mroonga to the latest version on 2014-10-21T04:51:38+0900 --- storage/mroonga/vendor/groonga/lib/CMakeLists.txt | 1 - storage/mroonga/vendor/groonga/lib/ctx_impl_mrb.c | 2 + storage/mroonga/vendor/groonga/lib/dat.cpp | 9 +- storage/mroonga/vendor/groonga/lib/dat.h | 3 +- storage/mroonga/vendor/groonga/lib/db.c | 303 ++++++++++--- storage/mroonga/vendor/groonga/lib/db.h | 15 +- storage/mroonga/vendor/groonga/lib/ecmascript.c | 292 +++++++------ .../mroonga/vendor/groonga/lib/ecmascript.lemon | 7 + storage/mroonga/vendor/groonga/lib/expr.c | 123 ++++-- storage/mroonga/vendor/groonga/lib/expr.h | 7 +- storage/mroonga/vendor/groonga/lib/groonga_in.h | 3 - storage/mroonga/vendor/groonga/lib/hash.c | 6 + storage/mroonga/vendor/groonga/lib/hash.h | 1 + storage/mroonga/vendor/groonga/lib/ii.c | 204 +++++---- storage/mroonga/vendor/groonga/lib/io.h | 1 + storage/mroonga/vendor/groonga/lib/mrb.h | 6 +- storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.c | 479 ++++++++++++++++++++- storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.h | 3 + storage/mroonga/vendor/groonga/lib/mrb/mrb_error.c | 185 ++++++++ storage/mroonga/vendor/groonga/lib/mrb/mrb_error.h | 34 ++ storage/mroonga/vendor/groonga/lib/mrb/mrb_expr.c | 78 ++++ storage/mroonga/vendor/groonga/lib/mrb/mrb_obj.c | 21 + .../vendor/groonga/lib/mrb/scripts/scan_info.rb | 3 + .../groonga/lib/mrb/scripts/scan_info_builder.rb | 106 ++++- .../groonga/lib/mrb/scripts/scan_info_data.rb | 27 ++ storage/mroonga/vendor/groonga/lib/mrb/sources.am | 2 + storage/mroonga/vendor/groonga/lib/output.c | 1 + storage/mroonga/vendor/groonga/lib/pat.c | 5 +- storage/mroonga/vendor/groonga/lib/pat.h | 3 +- storage/mroonga/vendor/groonga/lib/plugin.c | 1 - storage/mroonga/vendor/groonga/lib/proc.c | 317 ++++++++++++-- storage/mroonga/vendor/groonga/lib/snip.c | 2 +- storage/mroonga/vendor/groonga/lib/sources.am | 1 + storage/mroonga/vendor/groonga/lib/store.c | 47 +- storage/mroonga/vendor/groonga/lib/str.c | 2 +- storage/mroonga/vendor/groonga/lib/token.c | 287 ++++++++---- storage/mroonga/vendor/groonga/lib/token.h | 30 +- storage/mroonga/vendor/groonga/lib/token_filter.c | 59 +++ storage/mroonga/vendor/groonga/lib/tokenizer.c | 60 ++- storage/mroonga/vendor/groonga/lib/util.c | 7 +- 40 files changed, 2244 insertions(+), 499 deletions(-) create mode 100644 storage/mroonga/vendor/groonga/lib/mrb/mrb_error.c create mode 100644 storage/mroonga/vendor/groonga/lib/mrb/mrb_error.h create mode 100644 storage/mroonga/vendor/groonga/lib/token_filter.c (limited to 'storage/mroonga/vendor/groonga/lib') diff --git a/storage/mroonga/vendor/groonga/lib/CMakeLists.txt b/storage/mroonga/vendor/groonga/lib/CMakeLists.txt index 5be82fa6620..d19c88e4311 100644 --- a/storage/mroonga/vendor/groonga/lib/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/lib/CMakeLists.txt @@ -50,7 +50,6 @@ else() ${MRUBY_LIBS}) endif() set_target_properties(libgroonga PROPERTIES OUTPUT_NAME "groonga") - if(NOT MRN_GROONGA_BUNDLED) target_link_libraries(libgroonga ${EXECINFO_LIBS} diff --git a/storage/mroonga/vendor/groonga/lib/ctx_impl_mrb.c b/storage/mroonga/vendor/groonga/lib/ctx_impl_mrb.c index 657e95ae4ce..21531e4a837 100644 --- a/storage/mroonga/vendor/groonga/lib/ctx_impl_mrb.c +++ b/storage/mroonga/vendor/groonga/lib/ctx_impl_mrb.c @@ -20,6 +20,7 @@ #include "ctx_impl.h" #include "mrb.h" +#include "mrb/mrb_error.h" #include "mrb/mrb_id.h" #include "mrb/mrb_operator.h" #include "mrb/mrb_ctx.h" @@ -46,6 +47,7 @@ grn_ctx_impl_mrb_init_bindings(grn_ctx *ctx) grn_mrb_load(ctx, "backtrace_entry.rb"); + grn_mrb_error_init(ctx); grn_mrb_id_init(ctx); grn_mrb_operator_init(ctx); grn_mrb_ctx_init(ctx); diff --git a/storage/mroonga/vendor/groonga/lib/dat.cpp b/storage/mroonga/vendor/groonga/lib/dat.cpp index 2d335e1f2f1..8588857cd5d 100644 --- a/storage/mroonga/vendor/groonga/lib/dat.cpp +++ b/storage/mroonga/vendor/groonga/lib/dat.cpp @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011-2012 Brazil +/* Copyright(C) 2011-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -112,6 +112,8 @@ grn_dat_init(grn_ctx *, grn_dat *dat) dat->trie = NULL; dat->old_trie = NULL; dat->tokenizer = NULL; + dat->normalizer = NULL; + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); CRITICAL_SECTION_INIT(dat->lock); } @@ -127,6 +129,7 @@ grn_dat_fin(grn_ctx *ctx, grn_dat *dat) grn_io_close(ctx, dat->io); dat->io = NULL; } + GRN_OBJ_FIN(ctx, &(dat->token_filters)); } /* @@ -312,6 +315,7 @@ grn_dat_create(grn_ctx *ctx, const char *path, uint32_t, } dat->encoding = encoding; dat->tokenizer = NULL; + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); dat->obj.header.flags = dat->header->flags; @@ -354,6 +358,7 @@ grn_dat_open(grn_ctx *ctx, const char *path) } else { dat->normalizer = grn_ctx_at(ctx, dat->header->normalizer); } + GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); dat->obj.header.flags = dat->header->flags; return dat; } @@ -696,7 +701,7 @@ grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str, dat->normalizer, flags); if (!normalized_string) { - fprintf(stderr, "error: grn_string_open() failed!\n"); + *str_rest = str; return -1; } grn_string_get_normalized(ctx, normalized_string, &str, &str_size, NULL); diff --git a/storage/mroonga/vendor/groonga/lib/dat.h b/storage/mroonga/vendor/groonga/lib/dat.h index 00c71df4c3b..d0c44b7035d 100644 --- a/storage/mroonga/vendor/groonga/lib/dat.h +++ b/storage/mroonga/vendor/groonga/lib/dat.h @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2011 Brazil +/* Copyright(C) 2011-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -37,6 +37,7 @@ struct _grn_dat { void *old_trie; grn_obj *tokenizer; grn_obj *normalizer; + grn_obj token_filters; grn_critical_section lock; }; diff --git a/storage/mroonga/vendor/groonga/lib/db.c b/storage/mroonga/vendor/groonga/lib/db.c index 28ec256c036..f948dc7353a 100644 --- a/storage/mroonga/vendor/groonga/lib/db.c +++ b/storage/mroonga/vendor/groonga/lib/db.c @@ -301,12 +301,20 @@ grn_db_close(grn_ctx *ctx, grn_obj *db) grn_bool ctx_used_db; if (!s) { return GRN_INVALID_ARGUMENT; } GRN_API_ENTER; + ctx_used_db = ctx->impl && ctx->impl->db == db; if (ctx_used_db) { grn_ctx_loader_clear(ctx); if (ctx->impl->parser) { grn_expr_parser_close(ctx); } + } + + GRN_TINY_ARRAY_EACH(&s->values, 1, grn_db_curr_id(ctx, db), id, vp, { + if (vp->ptr) { grn_obj_close(ctx, vp->ptr); } + }); + + if (ctx_used_db) { if (ctx->impl->values) { grn_db_obj *o; GRN_ARRAY_EACH(ctx, ctx->impl->values, 0, 0, id, &o, { @@ -315,9 +323,7 @@ grn_db_close(grn_ctx *ctx, grn_obj *db) grn_array_truncate(ctx, ctx->impl->values); } } - GRN_TINY_ARRAY_EACH(&s->values, 1, grn_db_curr_id(ctx, db), id, vp, { - if (vp->ptr) { grn_obj_close(ctx, vp->ptr); } - }); + /* grn_tiny_array_fin should be refined.. */ #ifdef WIN32 { @@ -326,6 +332,7 @@ grn_db_close(grn_ctx *ctx, grn_obj *db) } #endif grn_tiny_array_fin(&s->values); + switch (s->keys->header.type) { case GRN_TABLE_PAT_KEY : grn_pat_close(ctx, (grn_pat *)s->keys); @@ -337,6 +344,7 @@ grn_db_close(grn_ctx *ctx, grn_obj *db) CRITICAL_SECTION_FIN(s->lock); if (s->specs) { grn_ja_close(ctx, s->specs); } GRN_FREE(s); + if (ctx_used_db) { grn_cache *cache; cache = grn_cache_current_get(ctx); @@ -345,6 +353,7 @@ grn_db_close(grn_ctx *ctx, grn_obj *db) } ctx->impl->db = NULL; } + GRN_API_RETURN(GRN_SUCCESS); } @@ -1472,11 +1481,11 @@ call_delete_hook(grn_ctx *ctx, grn_obj *table, grn_id rid, const void *key, unsi GRN_TEXT_SET_REF(&oldvalue_, key, key_size); GRN_UINT32_SET(ctx, &id_, rid); GRN_UINT32_SET(ctx, &flags_, GRN_OBJ_SET); - grn_ctx_push(ctx, &id_); - grn_ctx_push(ctx, &oldvalue_); - grn_ctx_push(ctx, &value_); - grn_ctx_push(ctx, &flags_); while (hooks) { + grn_ctx_push(ctx, &id_); + grn_ctx_push(ctx, &oldvalue_); + grn_ctx_push(ctx, &value_); + grn_ctx_push(ctx, &flags_); pctx.caller = NULL; pctx.currh = hooks; if (hooks->proc) { @@ -1845,6 +1854,7 @@ grn_table_truncate(grn_ctx *ctx, grn_obj *table) grn_hash *cols; grn_obj *tokenizer; grn_obj *normalizer; + grn_obj token_filters; if ((cols = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY))) { if (grn_table_columns(ctx, table, "", 0, (grn_obj *)cols)) { @@ -1856,7 +1866,9 @@ grn_table_truncate(grn_ctx *ctx, grn_obj *table) } grn_hash_close(ctx, cols); } - grn_table_get_info(ctx, table, NULL, NULL, &tokenizer, &normalizer); + grn_table_get_info(ctx, table, NULL, NULL, &tokenizer, &normalizer, NULL); + GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_obj_get_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); switch (table->header.type) { case GRN_TABLE_PAT_KEY : for (hooks = DB_OBJ(table)->hooks[GRN_HOOK_INSERT]; hooks; hooks = hooks->next) { @@ -1891,6 +1903,8 @@ grn_table_truncate(grn_ctx *ctx, grn_obj *table) } grn_obj_set_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, tokenizer); grn_obj_set_info(ctx, table, GRN_INFO_NORMALIZER, normalizer); + grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); + GRN_OBJ_FIN(ctx, &token_filters); grn_obj_touch(ctx, table, NULL); } exit : @@ -1900,7 +1914,8 @@ exit : grn_rc grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, grn_encoding *encoding, grn_obj **tokenizer, - grn_obj **normalizer) + grn_obj **normalizer, + grn_obj **token_filters) { grn_rc rc = GRN_INVALID_ARGUMENT; GRN_API_ENTER; @@ -1911,6 +1926,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, if (encoding) { *encoding = ((grn_pat *)table)->encoding; } if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer; } if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer; } + if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filters); } rc = GRN_SUCCESS; break; case GRN_TABLE_DAT_KEY : @@ -1918,6 +1934,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, if (encoding) { *encoding = ((grn_dat *)table)->encoding; } if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer; } if (normalizer) { *normalizer = ((grn_dat *)table)->normalizer; } + if (token_filters) { *token_filters = &(((grn_dat *)table)->token_filters); } rc = GRN_SUCCESS; break; case GRN_TABLE_HASH_KEY : @@ -1925,6 +1942,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, if (encoding) { *encoding = ((grn_hash *)table)->encoding; } if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer; } if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer; } + if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filters); } rc = GRN_SUCCESS; break; case GRN_TABLE_NO_KEY : @@ -1932,6 +1950,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, if (encoding) { *encoding = GRN_ENC_NONE; } if (tokenizer) { *tokenizer = grn_token_uvector; } if (normalizer) { *normalizer = NULL; } + if (token_filters) { *token_filters = NULL; } rc = GRN_SUCCESS; break; } @@ -3515,7 +3534,6 @@ grn_column_create(grn_ctx *ctx, grn_obj *table, grn_id domain = GRN_ID_NIL; char fullname[GRN_TABLE_MAX_KEY_SIZE]; char buffer[PATH_MAX]; - grn_bool ja_p = GRN_FALSE; GRN_API_ENTER; if (!table) { ERR(GRN_INVALID_ARGUMENT, "[column][create] table is missing"); @@ -3635,14 +3653,12 @@ grn_column_create(grn_ctx *ctx, grn_obj *table, case GRN_OBJ_COLUMN_SCALAR : if ((flags & GRN_OBJ_KEY_VAR_SIZE) || value_size > sizeof(int64_t)) { res = (grn_obj *)grn_ja_create(ctx, path, value_size, flags); - ja_p = GRN_TRUE; } else { res = (grn_obj *)grn_ra_create(ctx, path, value_size); } break; case GRN_OBJ_COLUMN_VECTOR : res = (grn_obj *)grn_ja_create(ctx, path, value_size * 30/*todo*/, flags); - ja_p = GRN_TRUE; //todo : zlib support break; case GRN_OBJ_COLUMN_INDEX : @@ -3655,31 +3671,6 @@ grn_column_create(grn_ctx *ctx, grn_obj *table, DB_OBJ(res)->range = range; DB_OBJ(res)->header.flags = flags; res->header.flags = flags; - if (ja_p) { - grn_bool zlib_p = GRN_FALSE; - grn_bool lzo_p = GRN_FALSE; -#ifdef GRN_WITH_ZLIB - if (flags & GRN_OBJ_COMPRESS_ZLIB) { - zlib_p = GRN_TRUE; - } -#endif /* GRN_WITH_ZLIB */ -#ifdef GRN_WITH_LZO - if (flags & GRN_OBJ_COMPRESS_LZO) { - lzo_p = GRN_TRUE; - } -#endif /* GRN_WITH_LZO */ - if (zlib_p || lzo_p) { - int table_name_len; - char table_name[GRN_TABLE_MAX_KEY_SIZE]; - table_name_len = grn_obj_name(ctx, table, table_name, - GRN_TABLE_MAX_KEY_SIZE); - GRN_LOG(ctx, GRN_LOG_WARNING, - "[column][create] " - "%s compressed column will leaks memories: <%.*s>.<%.*s>", - zlib_p ? "zlib" : "lzo", - table_name_len, table_name, name_size, name); - } - } if (grn_db_obj_init(ctx, db, id, DB_OBJ(res))) { _grn_obj_remove(ctx, res); res = NULL; @@ -5676,15 +5667,15 @@ grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id, case GRN_BULK : { unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; if (v && s && - (token = grn_token_open(ctx, lexicon, v, s, - GRN_TOKEN_ADD, token_flags))) { - while (!token->status) { - grn_id tid = grn_token_next(ctx, token); + (token_cursor = grn_token_cursor_open(ctx, lexicon, v, s, + GRN_TOKEN_ADD, token_flags))) { + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_id tid = grn_token_cursor_next(ctx, token_cursor); grn_uvector_add_element(ctx, &uvector, tid, 0); } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } rc = grn_ja_put(ctx, (grn_ja *)obj, id, GRN_BULK_HEAD(&uvector), GRN_BULK_VSIZE(&uvector), @@ -6289,6 +6280,42 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb break; } break; + case GRN_INFO_TOKEN_FILTERS : + if (!valuebuf) { + if (!(valuebuf = grn_obj_open(ctx, GRN_PVECTOR, 0, 0))) { + ERR(GRN_NO_MEMORY_AVAILABLE, + "grn_obj_get_info: failed to allocate value buffer"); + goto exit; + } + } + { + grn_obj *token_filters = NULL; + switch (obj->header.type) { + case GRN_TABLE_HASH_KEY : + token_filters = &(((grn_hash *)obj)->token_filters); + break; + case GRN_TABLE_PAT_KEY : + token_filters = &(((grn_pat *)obj)->token_filters); + break; + case GRN_TABLE_DAT_KEY : + token_filters = &(((grn_dat *)obj)->token_filters); + break; + default : + ERR(GRN_INVALID_ARGUMENT, + /* TODO: Show type name instead of type ID */ + "[info][get][token-filters] target object must be one of " + "GRN_TABLE_HASH_KEY, GRN_TABLE_PAT_KEY and GRN_TABLE_DAT_KEY: %d", + obj->header.type); + break; + } + if (token_filters) { + grn_bulk_write(ctx, + valuebuf, + GRN_BULK_HEAD(token_filters), + GRN_BULK_VSIZE(token_filters)); + } + } + break; default : /* todo */ break; @@ -6311,7 +6338,7 @@ build_index(grn_ctx *ctx, grn_obj *obj) grn_obj_flags flags; grn_ii *ii = (grn_ii *)obj; grn_bool use_grn_ii_build; - grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL); + grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL, NULL); switch (flags & GRN_OBJ_TABLE_TYPE_MASK) { case GRN_OBJ_TABLE_PAT_KEY : case GRN_OBJ_TABLE_DAT_KEY : @@ -6519,6 +6546,23 @@ grn_hook_unpack(grn_ctx *ctx, grn_db_obj *obj, const char *buf, uint32_t buf_siz return GRN_SUCCESS; } +static void +grn_token_filters_pack(grn_ctx *ctx, + grn_obj *token_filters, + grn_obj *buffer) +{ + unsigned int i, n_token_filters; + + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); + grn_id token_filter_id; + + token_filter_id = grn_obj_id(ctx, token_filter); + GRN_RECORD_PUT(ctx, buffer, token_filter_id); + } +} + void grn_obj_spec_save(grn_ctx *ctx, grn_db_obj *obj) { @@ -6543,6 +6587,18 @@ grn_obj_spec_save(grn_ctx *ctx, grn_db_obj *obj) grn_hook_pack(ctx, obj, b); grn_vector_delimit(ctx, &v, 0, 0); switch (obj->header.type) { + case GRN_TABLE_HASH_KEY : + grn_token_filters_pack(ctx, &(((grn_hash *)obj)->token_filters), b); + grn_vector_delimit(ctx, &v, 0, 0); + break; + case GRN_TABLE_PAT_KEY : + grn_token_filters_pack(ctx, &(((grn_pat *)obj)->token_filters), b); + grn_vector_delimit(ctx, &v, 0, 0); + break; + case GRN_TABLE_DAT_KEY : + grn_token_filters_pack(ctx, &(((grn_dat *)obj)->token_filters), b); + grn_vector_delimit(ctx, &v, 0, 0); + break; case GRN_EXPR : grn_expr_pack(ctx, b, (grn_obj *)obj); grn_vector_delimit(ctx, &v, 0, 0); @@ -6748,6 +6804,62 @@ grn_obj_set_info_source(grn_ctx *ctx, grn_obj *obj, grn_obj *value) return rc; } +static grn_rc +grn_obj_set_info_token_filters(grn_ctx *ctx, + grn_obj *table, + grn_obj *token_filters) +{ + grn_obj *current_token_filters; + unsigned int i, n_token_filters; + grn_obj token_filter_names; + + switch (table->header.type) { + case GRN_TABLE_HASH_KEY : + current_token_filters = &(((grn_hash *)table)->token_filters); + break; + case GRN_TABLE_PAT_KEY : + current_token_filters = &(((grn_pat *)table)->token_filters); + break; + case GRN_TABLE_DAT_KEY : + current_token_filters = &(((grn_dat *)table)->token_filters); + break; + default : + /* TODO: Show type name instead of type ID */ + ERR(GRN_INVALID_ARGUMENT, + "[info][set][token-filters] target object must be one of " + "GRN_TABLE_HASH_KEY, GRN_TABLE_PAT_KEY and GRN_TABLE_DAT_KEY: %d", + table->header.type); + return ctx->rc; + } + + GRN_TEXT_INIT(&token_filter_names, 0); + GRN_BULK_REWIND(current_token_filters); + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); + char token_filter_name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int token_filter_name_size; + + GRN_PTR_PUT(ctx, current_token_filters, token_filter); + + token_filter_name_size = grn_obj_name(ctx, + token_filter, + token_filter_name, + GRN_TABLE_MAX_KEY_SIZE); + GRN_TEXT_PUT(ctx, + &token_filter_names, + token_filter_name, + token_filter_name_size); + } + GRN_LOG(ctx, GRN_LOG_NOTICE, "DDL:set_token_filters %.*s", + (int)GRN_BULK_VSIZE(&token_filter_names), + GRN_BULK_HEAD(&token_filter_names)); + GRN_OBJ_FIN(ctx, &token_filter_names); + grn_obj_spec_save(ctx, DB_OBJ(table)); + + return GRN_SUCCESS; +} + grn_rc grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) { @@ -6807,6 +6919,9 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value) } } break; + case GRN_INFO_TOKEN_FILTERS : + rc = grn_obj_set_info_token_filters(ctx, obj, value); + break; default : /* todo */ break; @@ -7702,10 +7817,22 @@ grn_db_obj_init(grn_ctx *ctx, grn_obj *db, grn_id id, grn_db_obj *obj) return rc; } +#define SERIALIZED_SPEC_INDEX_SPEC 0 +#define SERIALIZED_SPEC_INDEX_PATH 1 +#define SERIALIZED_SPEC_INDEX_SOURCE 2 +#define SERIALIZED_SPEC_INDEX_HOOK 3 +#define SERIALIZED_SPEC_INDEX_TOKEN_FILTERS 4 +#define SERIALIZED_SPEC_INDEX_EXPR 4 + #define GET_PATH(spec,buffer,s,id) do {\ if (spec->header.flags & GRN_OBJ_CUSTOM_NAME) {\ const char *path;\ - unsigned int size = grn_vector_get_element(ctx, &v, 1, &path, NULL, NULL); \ + unsigned int size = grn_vector_get_element(ctx,\ + &v,\ + SERIALIZED_SPEC_INDEX_PATH,\ + &path,\ + NULL,\ + NULL);\ if (size > PATH_MAX) { ERR(GRN_FILENAME_TOO_LONG, "too long path"); }\ memcpy(buffer, path, size);\ buffer[size] = '\0';\ @@ -7721,18 +7848,62 @@ grn_db_obj_init(grn_ctx *ctx, grn_obj *db, grn_id id, grn_db_obj *obj) r->id = id;\ r->range = spec->range;\ r->db = (grn_obj *)s;\ - size = grn_vector_get_element(ctx, &v, 2, &p, NULL, NULL);\ + size = grn_vector_get_element(ctx,\ + &v,\ + SERIALIZED_SPEC_INDEX_SOURCE,\ + &p,\ + NULL,\ + NULL);\ if (size) {\ if ((r->source = GRN_MALLOC(size))) {\ memcpy(r->source, p, size);\ r->source_size = size;\ }\ }\ - size = grn_vector_get_element(ctx, &v, 3, &p, NULL, NULL);\ + size = grn_vector_get_element(ctx,\ + &v,\ + SERIALIZED_SPEC_INDEX_HOOK,\ + &p,\ + NULL,\ + NULL);\ grn_hook_unpack(ctx, r, p, size);\ }\ } while (0) +static void +grn_token_filters_unpack(grn_ctx *ctx, + grn_obj *token_filters, + grn_obj *spec_vector) +{ + grn_id *token_filter_ids; + unsigned int element_size; + unsigned int i, n_token_filter_ids; + + if (grn_vector_size(ctx, spec_vector) < SERIALIZED_SPEC_INDEX_TOKEN_FILTERS) { + return; + } + + element_size = grn_vector_get_element(ctx, + spec_vector, + SERIALIZED_SPEC_INDEX_TOKEN_FILTERS, + (const char **)(&token_filter_ids), + NULL, + NULL); + n_token_filter_ids = element_size / sizeof(grn_id); + for (i = 0; i < n_token_filter_ids; i++) { + grn_id token_filter_id = token_filter_ids[i]; + grn_obj *token_filter; + + token_filter = grn_ctx_at(ctx, token_filter_id); + if (!token_filter) { + ERR(GRN_INVALID_ARGUMENT, + "nonexistent token filter ID: %d", token_filter_id); + return; + } + GRN_PTR_PUT(ctx, token_filters, token_filter); + } +} + grn_obj * grn_ctx_at(grn_ctx *ctx, grn_id id) { @@ -7792,7 +7963,12 @@ grn_ctx_at(grn_ctx *ctx, grn_id id) uint32_t size; grn_obj_spec *spec; char buffer[PATH_MAX]; - size = grn_vector_get_element(ctx, &v, 0, (const char **)&spec, NULL, NULL); + size = grn_vector_get_element(ctx, + &v, + SERIALIZED_SPEC_INDEX_SPEC, + (const char **)&spec, + NULL, + NULL); if (size) { switch (spec->header.type) { case GRN_TYPE : @@ -7803,27 +7979,33 @@ grn_ctx_at(grn_ctx *ctx, grn_id id) GET_PATH(spec, buffer, s, id); vp->ptr = (grn_obj *)grn_hash_open(ctx, buffer); if (vp->ptr) { + grn_hash *hash = (grn_hash *)(vp->ptr); grn_obj_flags flags = vp->ptr->header.flags; UNPACK_INFO(); vp->ptr->header.flags = flags; + grn_token_filters_unpack(ctx, &(hash->token_filters), &v); } break; case GRN_TABLE_PAT_KEY : GET_PATH(spec, buffer, s, id); vp->ptr = (grn_obj *)grn_pat_open(ctx, buffer); if (vp->ptr) { + grn_pat *pat = (grn_pat *)(vp->ptr); grn_obj_flags flags = vp->ptr->header.flags; UNPACK_INFO(); vp->ptr->header.flags = flags; + grn_token_filters_unpack(ctx, &(pat->token_filters), &v); } break; case GRN_TABLE_DAT_KEY : GET_PATH(spec, buffer, s, id); vp->ptr = (grn_obj *)grn_dat_open(ctx, buffer); if (vp->ptr) { + grn_dat *dat = (grn_dat *)(vp->ptr); grn_obj_flags flags = vp->ptr->header.flags; UNPACK_INFO(); vp->ptr->header.flags = flags; + grn_token_filters_unpack(ctx, &(dat->token_filters), &v); } break; case GRN_TABLE_NO_KEY : @@ -7856,7 +8038,12 @@ grn_ctx_at(grn_ctx *ctx, grn_id id) case GRN_EXPR : { uint8_t *u; - size = grn_vector_get_element(ctx, &v, 4, &p, NULL, NULL); + size = grn_vector_get_element(ctx, + &v, + SERIALIZED_SPEC_INDEX_EXPR, + &p, + NULL, + NULL); u = (uint8_t *)p; vp->ptr = grn_expr_open(ctx, spec, u, u + size); } @@ -9050,7 +9237,7 @@ grn_column_index_column_equal(grn_ctx *ctx, grn_obj *obj, grn_operator op, if (obj->header.type != GRN_COLUMN_FIX_SIZE) { grn_obj *tokenizer, *lexicon = grn_ctx_at(ctx, target->header.domain); if (!lexicon) { continue; } - grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL); + grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); if (tokenizer) { continue; } } if (n < buf_size) { @@ -9115,7 +9302,7 @@ grn_column_index_column_range(grn_ctx *ctx, grn_obj *obj, grn_operator op, if (!lexicon) { continue; } if (lexicon->header.type != GRN_TABLE_PAT_KEY) { continue; } /* FIXME: GRN_TABLE_DAT_KEY should be supported */ - grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL); + grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); if (tokenizer) { continue; } } if (n < buf_size) { @@ -9147,7 +9334,7 @@ is_valid_range_index(grn_ctx *ctx, grn_obj *index_column) return GRN_FALSE; } - grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL); + grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); grn_obj_unlink(ctx, lexicon); if (tokenizer) { return GRN_FALSE; } @@ -9644,10 +9831,10 @@ grn_table_tokenize(grn_ctx *ctx, grn_obj *table, const char *str, unsigned int str_len, grn_obj *buf, grn_bool addp) { - grn_token *token = NULL; + grn_token_cursor *token_cursor = NULL; grn_token_mode mode = addp ? GRN_TOKEN_ADD : GRN_TOKEN_GET; GRN_API_ENTER; - if (!(token = grn_token_open(ctx, table, str, str_len, mode, 0))) { + if (!(token_cursor = grn_token_cursor_open(ctx, table, str, str_len, mode, 0))) { goto exit; } if (buf) { @@ -9657,15 +9844,15 @@ grn_table_tokenize(grn_ctx *ctx, grn_obj *table, goto exit; } } - while (token->status != GRN_TOKEN_DONE) { + while (token_cursor->status != GRN_TOKEN_DONE && token_cursor->status != GRN_TOKEN_DONE_SKIP) { grn_id tid; - if ((tid = grn_token_next(ctx, token))) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { GRN_RECORD_PUT(ctx, buf, tid); } } exit : - if (token) { - grn_token_close(ctx, token); + if (token_cursor) { + grn_token_cursor_close(ctx, token_cursor); } GRN_API_RETURN(buf); } diff --git a/storage/mroonga/vendor/groonga/lib/db.h b/storage/mroonga/vendor/groonga/lib/db.h index d48a625ef44..f2f95560b08 100644 --- a/storage/mroonga/vendor/groonga/lib/db.h +++ b/storage/mroonga/vendor/groonga/lib/db.h @@ -29,6 +29,8 @@ #include "store.h" #endif /* GRN_STORE_H */ +#include + #ifdef __cplusplus extern "C" { #endif @@ -96,7 +98,8 @@ grn_id grn_table_add_v(grn_ctx *ctx, grn_obj *table, const void *key, int key_si void **value, int *added); GRN_API grn_rc grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags, grn_encoding *encoding, grn_obj **tokenizer, - grn_obj **normalizer); + grn_obj **normalizer, + grn_obj **token_filters); const char *_grn_table_key(grn_ctx *ctx, grn_obj *table, grn_id id, uint32_t *key_size); grn_rc grn_table_search(grn_ctx *ctx, grn_obj *table, @@ -184,6 +187,16 @@ struct _grn_proc { grn_selector_func *selector; + union { + struct { + grn_token_filter_init_func *init; + grn_token_filter_filter_func *filter; + grn_token_filter_fin_func *fin; + } token_filter; + } callbacks; + + void *user_data; + grn_id module; // uint32_t nargs; // uint32_t nresults; diff --git a/storage/mroonga/vendor/groonga/lib/ecmascript.c b/storage/mroonga/vendor/groonga/lib/ecmascript.c index fd74f95b815..e973f1281a4 100644 --- a/storage/mroonga/vendor/groonga/lib/ecmascript.c +++ b/storage/mroonga/vendor/groonga/lib/ecmascript.c @@ -4,7 +4,7 @@ /* First off, code is included that follows the "include" declaration ** in the input grammar file. */ #include -#line 3 "ecmascript.lemon" +#line 4 "ecmascript.lemon" #define assert GRN_ASSERT #line 11 "ecmascript.c" @@ -867,7 +867,7 @@ static void yy_destructor( */ case 74: /* suppress_unused_variable_warning */ { -#line 10 "ecmascript.lemon" +#line 11 "ecmascript.lemon" (void)efsi; @@ -1295,7 +1295,7 @@ static void yy_reduce( ** break; */ case 5: /* query ::= query query_element */ -#line 45 "ecmascript.lemon" +#line 46 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, grn_int32_value_at(&efsi->op_stack, -1), 2); } @@ -1303,7 +1303,7 @@ static void yy_reduce( break; case 6: /* query ::= query LOGICAL_AND query_element */ case 35: /* logical_and_expression ::= logical_and_expression LOGICAL_AND bitwise_or_expression */ yytestcase(yyruleno==35); -#line 48 "ecmascript.lemon" +#line 49 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_AND, 2); } @@ -1311,7 +1311,7 @@ static void yy_reduce( break; case 7: /* query ::= query LOGICAL_AND_NOT query_element */ case 36: /* logical_and_expression ::= logical_and_expression LOGICAL_AND_NOT bitwise_or_expression */ yytestcase(yyruleno==36); -#line 51 "ecmascript.lemon" +#line 52 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_AND_NOT, 2); } @@ -1319,14 +1319,14 @@ static void yy_reduce( break; case 8: /* query ::= query LOGICAL_OR query_element */ case 33: /* logical_or_expression ::= logical_or_expression LOGICAL_OR logical_and_expression */ yytestcase(yyruleno==33); -#line 54 "ecmascript.lemon" +#line 55 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_OR, 2); } #line 1327 "ecmascript.c" break; case 11: /* query_element ::= RELATIVE_OP query_element */ -#line 61 "ecmascript.lemon" +#line 62 "ecmascript.lemon" { int mode; GRN_INT32_POP(&efsi->mode_stack, mode); @@ -1334,7 +1334,7 @@ static void yy_reduce( #line 1335 "ecmascript.c" break; case 12: /* query_element ::= IDENTIFIER RELATIVE_OP query_element */ -#line 65 "ecmascript.lemon" +#line 66 "ecmascript.lemon" { int mode; grn_obj *c; @@ -1348,314 +1348,320 @@ static void yy_reduce( GRN_INT32_POP(&efsi->max_interval_stack, max_interval); } break; + case GRN_OP_SIMILAR : + { + int similarity_threshold; + GRN_INT32_POP(&efsi->similarity_threshold_stack, similarity_threshold); + } + break; default : break; } } -#line 1356 "ecmascript.c" +#line 1362 "ecmascript.c" break; case 13: /* query_element ::= BRACEL expression BRACER */ case 14: /* query_element ::= EVAL primary_expression */ yytestcase(yyruleno==14); -#line 82 "ecmascript.lemon" +#line 89 "ecmascript.lemon" { efsi->flags = efsi->default_flags; } -#line 1364 "ecmascript.c" +#line 1370 "ecmascript.c" break; case 16: /* expression ::= expression COMMA assignment_expression */ -#line 90 "ecmascript.lemon" +#line 97 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_COMMA, 2); } -#line 1371 "ecmascript.c" +#line 1377 "ecmascript.c" break; case 18: /* assignment_expression ::= lefthand_side_expression ASSIGN assignment_expression */ -#line 95 "ecmascript.lemon" +#line 102 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_ASSIGN, 2); } -#line 1378 "ecmascript.c" +#line 1384 "ecmascript.c" break; case 19: /* assignment_expression ::= lefthand_side_expression STAR_ASSIGN assignment_expression */ -#line 98 "ecmascript.lemon" +#line 105 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_STAR_ASSIGN, 2); } -#line 1385 "ecmascript.c" +#line 1391 "ecmascript.c" break; case 20: /* assignment_expression ::= lefthand_side_expression SLASH_ASSIGN assignment_expression */ -#line 101 "ecmascript.lemon" +#line 108 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SLASH_ASSIGN, 2); } -#line 1392 "ecmascript.c" +#line 1398 "ecmascript.c" break; case 21: /* assignment_expression ::= lefthand_side_expression MOD_ASSIGN assignment_expression */ -#line 104 "ecmascript.lemon" +#line 111 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MOD_ASSIGN, 2); } -#line 1399 "ecmascript.c" +#line 1405 "ecmascript.c" break; case 22: /* assignment_expression ::= lefthand_side_expression PLUS_ASSIGN assignment_expression */ -#line 107 "ecmascript.lemon" +#line 114 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_PLUS_ASSIGN, 2); } -#line 1406 "ecmascript.c" +#line 1412 "ecmascript.c" break; case 23: /* assignment_expression ::= lefthand_side_expression MINUS_ASSIGN assignment_expression */ -#line 110 "ecmascript.lemon" +#line 117 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MINUS_ASSIGN, 2); } -#line 1413 "ecmascript.c" +#line 1419 "ecmascript.c" break; case 24: /* assignment_expression ::= lefthand_side_expression SHIFTL_ASSIGN assignment_expression */ -#line 113 "ecmascript.lemon" +#line 120 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTL_ASSIGN, 2); } -#line 1420 "ecmascript.c" +#line 1426 "ecmascript.c" break; case 25: /* assignment_expression ::= lefthand_side_expression SHIFTR_ASSIGN assignment_expression */ -#line 116 "ecmascript.lemon" +#line 123 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTR_ASSIGN, 2); } -#line 1427 "ecmascript.c" +#line 1433 "ecmascript.c" break; case 26: /* assignment_expression ::= lefthand_side_expression SHIFTRR_ASSIGN assignment_expression */ -#line 119 "ecmascript.lemon" +#line 126 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTRR_ASSIGN, 2); } -#line 1434 "ecmascript.c" +#line 1440 "ecmascript.c" break; case 27: /* assignment_expression ::= lefthand_side_expression AND_ASSIGN assignment_expression */ -#line 122 "ecmascript.lemon" +#line 129 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_AND_ASSIGN, 2); } -#line 1441 "ecmascript.c" +#line 1447 "ecmascript.c" break; case 28: /* assignment_expression ::= lefthand_side_expression XOR_ASSIGN assignment_expression */ -#line 125 "ecmascript.lemon" +#line 132 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_XOR_ASSIGN, 2); } -#line 1448 "ecmascript.c" +#line 1454 "ecmascript.c" break; case 29: /* assignment_expression ::= lefthand_side_expression OR_ASSIGN assignment_expression */ -#line 128 "ecmascript.lemon" +#line 135 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_OR_ASSIGN, 2); } -#line 1455 "ecmascript.c" +#line 1461 "ecmascript.c" break; case 31: /* conditional_expression ::= logical_or_expression QUESTION assignment_expression COLON assignment_expression */ -#line 133 "ecmascript.lemon" +#line 140 "ecmascript.lemon" { grn_expr *e = (grn_expr *)efsi->e; e->codes[yymsp[-3].minor.yy0].nargs = yymsp[-1].minor.yy0 - yymsp[-3].minor.yy0; e->codes[yymsp[-1].minor.yy0].nargs = e->codes_curr - yymsp[-1].minor.yy0 - 1; } -#line 1464 "ecmascript.c" +#line 1470 "ecmascript.c" break; case 38: /* bitwise_or_expression ::= bitwise_or_expression BITWISE_OR bitwise_xor_expression */ -#line 153 "ecmascript.lemon" +#line 160 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_BITWISE_OR, 2); } -#line 1471 "ecmascript.c" +#line 1477 "ecmascript.c" break; case 40: /* bitwise_xor_expression ::= bitwise_xor_expression BITWISE_XOR bitwise_and_expression */ -#line 158 "ecmascript.lemon" +#line 165 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_BITWISE_XOR, 2); } -#line 1478 "ecmascript.c" +#line 1484 "ecmascript.c" break; case 42: /* bitwise_and_expression ::= bitwise_and_expression BITWISE_AND equality_expression */ -#line 163 "ecmascript.lemon" +#line 170 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_BITWISE_AND, 2); } -#line 1485 "ecmascript.c" +#line 1491 "ecmascript.c" break; case 44: /* equality_expression ::= equality_expression EQUAL relational_expression */ -#line 168 "ecmascript.lemon" +#line 175 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_EQUAL, 2); } -#line 1492 "ecmascript.c" +#line 1498 "ecmascript.c" break; case 45: /* equality_expression ::= equality_expression NOT_EQUAL relational_expression */ -#line 171 "ecmascript.lemon" +#line 178 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_NOT_EQUAL, 2); } -#line 1499 "ecmascript.c" +#line 1505 "ecmascript.c" break; case 47: /* relational_expression ::= relational_expression LESS shift_expression */ -#line 176 "ecmascript.lemon" +#line 183 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_LESS, 2); } -#line 1506 "ecmascript.c" +#line 1512 "ecmascript.c" break; case 48: /* relational_expression ::= relational_expression GREATER shift_expression */ -#line 179 "ecmascript.lemon" +#line 186 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_GREATER, 2); } -#line 1513 "ecmascript.c" +#line 1519 "ecmascript.c" break; case 49: /* relational_expression ::= relational_expression LESS_EQUAL shift_expression */ -#line 182 "ecmascript.lemon" +#line 189 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_LESS_EQUAL, 2); } -#line 1520 "ecmascript.c" +#line 1526 "ecmascript.c" break; case 50: /* relational_expression ::= relational_expression GREATER_EQUAL shift_expression */ -#line 185 "ecmascript.lemon" +#line 192 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_GREATER_EQUAL, 2); } -#line 1527 "ecmascript.c" +#line 1533 "ecmascript.c" break; case 51: /* relational_expression ::= relational_expression IN shift_expression */ -#line 188 "ecmascript.lemon" +#line 195 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_IN, 2); } -#line 1534 "ecmascript.c" +#line 1540 "ecmascript.c" break; case 52: /* relational_expression ::= relational_expression MATCH shift_expression */ case 130: /* adjust_match_expression ::= IDENTIFIER MATCH STRING */ yytestcase(yyruleno==130); -#line 191 "ecmascript.lemon" +#line 198 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MATCH, 2); } -#line 1542 "ecmascript.c" +#line 1548 "ecmascript.c" break; case 53: /* relational_expression ::= relational_expression NEAR shift_expression */ -#line 194 "ecmascript.lemon" +#line 201 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_NEAR, 2); } -#line 1549 "ecmascript.c" +#line 1555 "ecmascript.c" break; case 54: /* relational_expression ::= relational_expression NEAR2 shift_expression */ -#line 197 "ecmascript.lemon" +#line 204 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_NEAR2, 2); } -#line 1556 "ecmascript.c" +#line 1562 "ecmascript.c" break; case 55: /* relational_expression ::= relational_expression SIMILAR shift_expression */ -#line 200 "ecmascript.lemon" +#line 207 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SIMILAR, 2); } -#line 1563 "ecmascript.c" +#line 1569 "ecmascript.c" break; case 56: /* relational_expression ::= relational_expression TERM_EXTRACT shift_expression */ -#line 203 "ecmascript.lemon" +#line 210 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_TERM_EXTRACT, 2); } -#line 1570 "ecmascript.c" +#line 1576 "ecmascript.c" break; case 57: /* relational_expression ::= relational_expression LCP shift_expression */ -#line 206 "ecmascript.lemon" +#line 213 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_LCP, 2); } -#line 1577 "ecmascript.c" +#line 1583 "ecmascript.c" break; case 58: /* relational_expression ::= relational_expression PREFIX shift_expression */ -#line 209 "ecmascript.lemon" +#line 216 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_PREFIX, 2); } -#line 1584 "ecmascript.c" +#line 1590 "ecmascript.c" break; case 59: /* relational_expression ::= relational_expression SUFFIX shift_expression */ -#line 212 "ecmascript.lemon" +#line 219 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SUFFIX, 2); } -#line 1591 "ecmascript.c" +#line 1597 "ecmascript.c" break; case 61: /* shift_expression ::= shift_expression SHIFTL additive_expression */ -#line 217 "ecmascript.lemon" +#line 224 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTL, 2); } -#line 1598 "ecmascript.c" +#line 1604 "ecmascript.c" break; case 62: /* shift_expression ::= shift_expression SHIFTR additive_expression */ -#line 220 "ecmascript.lemon" +#line 227 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTR, 2); } -#line 1605 "ecmascript.c" +#line 1611 "ecmascript.c" break; case 63: /* shift_expression ::= shift_expression SHIFTRR additive_expression */ -#line 223 "ecmascript.lemon" +#line 230 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SHIFTRR, 2); } -#line 1612 "ecmascript.c" +#line 1618 "ecmascript.c" break; case 65: /* additive_expression ::= additive_expression PLUS multiplicative_expression */ case 127: /* adjuster ::= adjuster PLUS adjust_expression */ yytestcase(yyruleno==127); -#line 228 "ecmascript.lemon" +#line 235 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_PLUS, 2); } -#line 1620 "ecmascript.c" +#line 1626 "ecmascript.c" break; case 66: /* additive_expression ::= additive_expression MINUS multiplicative_expression */ -#line 231 "ecmascript.lemon" +#line 238 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MINUS, 2); } -#line 1627 "ecmascript.c" +#line 1633 "ecmascript.c" break; case 68: /* multiplicative_expression ::= multiplicative_expression STAR unary_expression */ case 129: /* adjust_expression ::= adjust_match_expression STAR DECIMAL */ yytestcase(yyruleno==129); -#line 236 "ecmascript.lemon" +#line 243 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_STAR, 2); } -#line 1635 "ecmascript.c" +#line 1641 "ecmascript.c" break; case 69: /* multiplicative_expression ::= multiplicative_expression SLASH unary_expression */ -#line 239 "ecmascript.lemon" +#line 246 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_SLASH, 2); } -#line 1642 "ecmascript.c" +#line 1648 "ecmascript.c" break; case 70: /* multiplicative_expression ::= multiplicative_expression MOD unary_expression */ -#line 242 "ecmascript.lemon" +#line 249 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MOD, 2); } -#line 1649 "ecmascript.c" +#line 1655 "ecmascript.c" break; case 72: /* unary_expression ::= DELETE unary_expression */ -#line 247 "ecmascript.lemon" +#line 254 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_DELETE, 1); } -#line 1656 "ecmascript.c" +#line 1662 "ecmascript.c" break; case 73: /* unary_expression ::= INCR unary_expression */ -#line 250 "ecmascript.lemon" +#line 257 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; grn_expr *e = (grn_expr *)(efsi->e); @@ -1673,10 +1679,10 @@ static void yy_reduce( grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_INCR, 1); } } -#line 1677 "ecmascript.c" +#line 1683 "ecmascript.c" break; case 74: /* unary_expression ::= DECR unary_expression */ -#line 267 "ecmascript.lemon" +#line 274 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; grn_expr *e = (grn_expr *)(efsi->e); @@ -1694,66 +1700,66 @@ static void yy_reduce( grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_DECR, 1); } } -#line 1698 "ecmascript.c" +#line 1704 "ecmascript.c" break; case 75: /* unary_expression ::= PLUS unary_expression */ -#line 284 "ecmascript.lemon" +#line 291 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_PLUS, 1); } -#line 1705 "ecmascript.c" +#line 1711 "ecmascript.c" break; case 76: /* unary_expression ::= MINUS unary_expression */ -#line 287 "ecmascript.lemon" +#line 294 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_MINUS, 1); } -#line 1712 "ecmascript.c" +#line 1718 "ecmascript.c" break; case 77: /* unary_expression ::= NOT unary_expression */ -#line 290 "ecmascript.lemon" +#line 297 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_NOT, 1); } -#line 1719 "ecmascript.c" +#line 1725 "ecmascript.c" break; case 78: /* unary_expression ::= BITWISE_NOT unary_expression */ -#line 293 "ecmascript.lemon" +#line 300 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_BITWISE_NOT, 1); } -#line 1726 "ecmascript.c" +#line 1732 "ecmascript.c" break; case 79: /* unary_expression ::= ADJUST unary_expression */ -#line 296 "ecmascript.lemon" +#line 303 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_ADJUST, 1); } -#line 1733 "ecmascript.c" +#line 1739 "ecmascript.c" break; case 80: /* unary_expression ::= EXACT unary_expression */ -#line 299 "ecmascript.lemon" +#line 306 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_EXACT, 1); } -#line 1740 "ecmascript.c" +#line 1746 "ecmascript.c" break; case 81: /* unary_expression ::= PARTIAL unary_expression */ -#line 302 "ecmascript.lemon" +#line 309 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_PARTIAL, 1); } -#line 1747 "ecmascript.c" +#line 1753 "ecmascript.c" break; case 82: /* unary_expression ::= UNSPLIT unary_expression */ -#line 305 "ecmascript.lemon" +#line 312 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_UNSPLIT, 1); } -#line 1754 "ecmascript.c" +#line 1760 "ecmascript.c" break; case 84: /* postfix_expression ::= lefthand_side_expression INCR */ -#line 310 "ecmascript.lemon" +#line 317 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; grn_expr *e = (grn_expr *)(efsi->e); @@ -1771,10 +1777,10 @@ static void yy_reduce( grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_INCR_POST, 1); } } -#line 1775 "ecmascript.c" +#line 1781 "ecmascript.c" break; case 85: /* postfix_expression ::= lefthand_side_expression DECR */ -#line 327 "ecmascript.lemon" +#line 334 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; grn_expr *e = (grn_expr *)(efsi->e); @@ -1792,51 +1798,51 @@ static void yy_reduce( grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_DECR_POST, 1); } } -#line 1796 "ecmascript.c" +#line 1802 "ecmascript.c" break; case 88: /* call_expression ::= member_expression arguments */ -#line 348 "ecmascript.lemon" +#line 355 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_CALL, yymsp[0].minor.yy0); } -#line 1803 "ecmascript.c" +#line 1809 "ecmascript.c" break; case 113: /* member_expression_part ::= BRACKETL expression BRACKETR */ -#line 384 "ecmascript.lemon" +#line 391 "ecmascript.lemon" { grn_expr_append_op(efsi->ctx, efsi->e, GRN_OP_GET_MEMBER, 2); } -#line 1810 "ecmascript.c" +#line 1816 "ecmascript.c" break; case 115: /* arguments ::= PARENL argument_list PARENR */ -#line 389 "ecmascript.lemon" +#line 396 "ecmascript.lemon" { yygotominor.yy0 = yymsp[-1].minor.yy0; } -#line 1815 "ecmascript.c" +#line 1821 "ecmascript.c" break; case 116: /* argument_list ::= */ -#line 390 "ecmascript.lemon" +#line 397 "ecmascript.lemon" { yygotominor.yy0 = 0; } -#line 1820 "ecmascript.c" +#line 1826 "ecmascript.c" break; case 117: /* argument_list ::= assignment_expression */ -#line 391 "ecmascript.lemon" +#line 398 "ecmascript.lemon" { yygotominor.yy0 = 1; } -#line 1825 "ecmascript.c" +#line 1831 "ecmascript.c" break; case 118: /* argument_list ::= argument_list COMMA assignment_expression */ -#line 392 "ecmascript.lemon" +#line 399 "ecmascript.lemon" { yygotominor.yy0 = yymsp[-2].minor.yy0 + 1; } -#line 1830 "ecmascript.c" +#line 1836 "ecmascript.c" break; case 119: /* output_columns ::= */ -#line 394 "ecmascript.lemon" +#line 401 "ecmascript.lemon" { yygotominor.yy0 = 0; } -#line 1837 "ecmascript.c" +#line 1843 "ecmascript.c" break; case 120: /* output_columns ::= output_column */ -#line 397 "ecmascript.lemon" +#line 404 "ecmascript.lemon" { if (yymsp[0].minor.yy0) { yygotominor.yy0 = 0; @@ -1844,10 +1850,10 @@ static void yy_reduce( yygotominor.yy0 = 1; } } -#line 1848 "ecmascript.c" +#line 1854 "ecmascript.c" break; case 121: /* output_columns ::= output_columns COMMA output_column */ -#line 405 "ecmascript.lemon" +#line 412 "ecmascript.lemon" { if (yymsp[0].minor.yy0) { yygotominor.yy0 = yymsp[-2].minor.yy0; @@ -1858,10 +1864,10 @@ static void yy_reduce( yygotominor.yy0 = 1; } } -#line 1862 "ecmascript.c" +#line 1868 "ecmascript.c" break; case 122: /* output_column ::= STAR */ -#line 416 "ecmascript.lemon" +#line 423 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; grn_obj *expr = efsi->e; @@ -1899,21 +1905,21 @@ static void yy_reduce( yygotominor.yy0 = GRN_TRUE; } } -#line 1903 "ecmascript.c" +#line 1909 "ecmascript.c" break; case 123: /* output_column ::= NONEXISTENT_COLUMN */ -#line 453 "ecmascript.lemon" +#line 460 "ecmascript.lemon" { yygotominor.yy0 = GRN_TRUE; } -#line 1910 "ecmascript.c" +#line 1916 "ecmascript.c" break; case 124: /* output_column ::= assignment_expression */ -#line 456 "ecmascript.lemon" +#line 463 "ecmascript.lemon" { yygotominor.yy0 = GRN_FALSE; } -#line 1917 "ecmascript.c" +#line 1923 "ecmascript.c" break; default: /* (0) input ::= query */ yytestcase(yyruleno==0); @@ -2027,7 +2033,7 @@ static void yy_syntax_error( ){ grn_expr_parserARG_FETCH; #define TOKEN (yyminor.yy0) -#line 16 "ecmascript.lemon" +#line 17 "ecmascript.lemon" { grn_ctx *ctx = efsi->ctx; @@ -2049,7 +2055,7 @@ static void yy_syntax_error( GRN_OBJ_FIN(ctx, &message); } } -#line 2053 "ecmascript.c" +#line 2059 "ecmascript.c" grn_expr_parserARG_STORE; /* Suppress warning about unused %extra_argument variable */ } diff --git a/storage/mroonga/vendor/groonga/lib/ecmascript.lemon b/storage/mroonga/vendor/groonga/lib/ecmascript.lemon index 669269973dc..322d7ac8264 100644 --- a/storage/mroonga/vendor/groonga/lib/ecmascript.lemon +++ b/storage/mroonga/vendor/groonga/lib/ecmascript.lemon @@ -1,3 +1,4 @@ +/* -*- mode: c; c-basic-offset: 2 -*- */ %name grn_expr_parser %token_prefix GRN_EXPR_TOKEN_ %include { @@ -75,6 +76,12 @@ query_element ::= IDENTIFIER RELATIVE_OP query_element. { GRN_INT32_POP(&efsi->max_interval_stack, max_interval); } break; + case GRN_OP_SIMILAR : + { + int similarity_threshold; + GRN_INT32_POP(&efsi->similarity_threshold_stack, similarity_threshold); + } + break; default : break; } diff --git a/storage/mroonga/vendor/groonga/lib/expr.c b/storage/mroonga/vendor/groonga/lib/expr.c index 8010f5ddbc0..dd70ebecb40 100644 --- a/storage/mroonga/vendor/groonga/lib/expr.c +++ b/storage/mroonga/vendor/groonga/lib/expr.c @@ -343,9 +343,11 @@ grn_ctx_push(grn_ctx *ctx, grn_obj *obj) return GRN_STACK_OVER_FLOW; } -static grn_obj * -const_new(grn_ctx *ctx, grn_expr *e) +grn_obj * +grn_expr_alloc_const(grn_ctx *ctx, grn_obj *expr) { + grn_expr *e = (grn_expr *)expr; + if (!e->consts) { if (!(e->consts = GRN_MALLOCN(grn_obj, GRN_STACK_SIZE))) { ERR(GRN_NO_MEMORY_AVAILABLE, "malloc failed"); @@ -482,7 +484,7 @@ grn_expr_unpack(grn_ctx *ctx, const uint8_t *p, const uint8_t *pe, grn_obj *expr GRN_B_DEC(id, p); code->value = grn_ctx_at(ctx, id); } else { - if (!(v = const_new(ctx, e))) { return NULL; } + if (!(v = grn_expr_alloc_const(ctx, expr))) { return NULL; } p = grn_obj_unpack(ctx, p, pe, object_type, GRN_OBJ_EXPRCONST, v); code->value = v; } @@ -539,6 +541,14 @@ exit : return (grn_obj *)expr; } +/* Pass ownership of `obj` to `expr`. */ +void +grn_expr_take_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj) +{ + grn_expr *e = (grn_expr *)expr; + GRN_PTR_PUT(ctx, &(e->objs), obj); +} + /* data flow info */ typedef struct { grn_expr_code *code; @@ -804,6 +814,19 @@ grn_expr_get_var_by_offset(grn_ctx *ctx, grn_obj *expr, unsigned int offset) x = code_->value; \ if (CONSTP(x)) { \ switch (domain) { \ + case GRN_DB_INT32: \ + { \ + int value; \ + value = GRN_INT32_VALUE(x); \ + if (value == (int)0x80000000) { \ + domain = GRN_DB_INT64; \ + x->header.domain = domain; \ + GRN_INT64_SET(ctx, x, -((long long int)value)); \ + } else { \ + GRN_INT32_SET(ctx, x, -value); \ + } \ + } \ + break; \ case GRN_DB_UINT32: \ { \ unsigned int value; \ @@ -1134,7 +1157,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, obj = col; type = col->header.type; domain = grn_obj_get_range(ctx, col); - GRN_PTR_PUT(ctx, &(e->objs), col); + grn_expr_take_obj(ctx, (grn_obj *)e, col); } } else { domain = grn_obj_get_range(ctx, obj); @@ -1253,7 +1276,6 @@ grn_expr_append_const(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, int nargs) { grn_obj *res = NULL; - grn_expr *e = (grn_expr *)expr; GRN_API_ENTER; if (!obj) { ERR(GRN_SYNTAX_ERROR, "constant is null"); @@ -1262,7 +1284,7 @@ grn_expr_append_const(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, if (GRN_DB_OBJP(obj) || GRN_ACCESSORP(obj)) { res = obj; } else { - if ((res = const_new(ctx, e))) { + if ((res = grn_expr_alloc_const(ctx, expr))) { switch (obj->header.type) { case GRN_VOID : case GRN_BULK : @@ -1287,8 +1309,7 @@ static grn_obj * grn_expr_add_str(grn_ctx *ctx, grn_obj *expr, const char *str, unsigned int str_size) { grn_obj *res = NULL; - grn_expr *e = (grn_expr *)expr; - if ((res = const_new(ctx, e))) { + if ((res = grn_expr_alloc_const(ctx, expr))) { GRN_TEXT_INIT(res, 0); grn_bulk_write(ctx, res, str, str_size); res->header.impl_flags |= GRN_OBJ_EXPRCONST; @@ -1312,9 +1333,8 @@ grn_expr_append_const_int(grn_ctx *ctx, grn_obj *expr, int i, grn_operator op, int nargs) { grn_obj *res = NULL; - grn_expr *e = (grn_expr *)expr; GRN_API_ENTER; - if ((res = const_new(ctx, e))) { + if ((res = grn_expr_alloc_const(ctx, expr))) { GRN_INT32_INIT(res, 0); GRN_INT32_SET(ctx, res, i); res->header.impl_flags |= GRN_OBJ_EXPRCONST; @@ -2886,7 +2906,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) if (col->header.type == GRN_BULK) { grn_obj *table = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(rec)); col = grn_obj_column(ctx, table, GRN_BULK_HEAD(col), GRN_BULK_VSIZE(col)); - if (col) { GRN_PTR_PUT(ctx, &e->objs, col); } + if (col) { grn_expr_take_obj(ctx, (grn_obj *)e, col); } } if (col) { res->header.type = GRN_PTR; @@ -3223,7 +3243,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) if (col->header.type == GRN_BULK) { grn_obj *table = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(rec)); col = grn_obj_column(ctx, table, GRN_BULK_HEAD(col), GRN_BULK_VSIZE(col)); - if (col) { GRN_PTR_PUT(ctx, &e->objs, col); } + if (col) { grn_expr_take_obj(ctx, (grn_obj *)expr, col); } } if (col) { grn_obj_reinit_for(ctx, res, col); @@ -4042,7 +4062,7 @@ grn_expr_get_value(grn_ctx *ctx, grn_obj *expr, int offset) #define DEFAULT_WEIGHT 5 #define DEFAULT_DECAYSTEP 2 #define DEFAULT_MAX_INTERVAL 10 -#define DEFAULT_SIMILARITY_THRESHOLD 10 +#define DEFAULT_SIMILARITY_THRESHOLD 0 #define DEFAULT_TERM_EXTRACT_POLICY 0 #define DEFAULT_WEIGHT_VECTOR_SIZE 4096 @@ -4058,6 +4078,7 @@ struct _grn_scan_info { grn_obj *query; grn_obj *args[8]; int max_interval; + int similarity_threshold; }; #define SI_FREE(si) do {\ @@ -4079,6 +4100,7 @@ struct _grn_scan_info { (si)->flags = SCAN_PUSH;\ (si)->nargs = 0;\ (si)->max_interval = DEFAULT_MAX_INTERVAL;\ + (si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\ (si)->start = (st);\ } while (0) @@ -4219,6 +4241,7 @@ grn_scan_info_open(grn_ctx *ctx, int start) si->flags = SCAN_PUSH; si->nargs = 0; si->max_interval = DEFAULT_MAX_INTERVAL; + si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; si->start = start; return si; @@ -4309,6 +4332,18 @@ grn_scan_info_set_max_interval(scan_info *si, int max_interval) si->max_interval = max_interval; } +int +grn_scan_info_get_similarity_threshold(scan_info *si) +{ + return si->similarity_threshold; +} + +void +grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold) +{ + si->similarity_threshold = similarity_threshold; +} + grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg) { @@ -4344,9 +4379,14 @@ scan_info_build_find_index_column_index(grn_ctx *ctx, index = ec->value; if (n_rest_codes > 2 && ec[1].value && - ec[1].value->header.domain == GRN_DB_UINT32 && + (ec[1].value->header.domain == GRN_DB_INT32 || + ec[1].value->header.domain == GRN_DB_UINT32) && ec[2].op == GRN_OP_GET_MEMBER) { - sid = GRN_UINT32_VALUE(ec[1].value) + 1; + if (ec[1].value->header.domain == GRN_DB_INT32) { + sid = GRN_INT32_VALUE(ec[1].value) + 1; + } else { + sid = GRN_UINT32_VALUE(ec[1].value) + 1; + } offset = 2; weight = get_weight(ctx, ec + offset); } else { @@ -4525,6 +4565,15 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, si->query = *p; } break; + case GRN_OP_SIMILAR : + if (si->nargs == 3 && + *p == si->args[2] && + (*p)->header.domain == GRN_DB_INT32) { + si->similarity_threshold = GRN_INT32_VALUE(*p); + } else { + si->query = *p; + } + break; default : si->query = *p; break; @@ -5135,14 +5184,17 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, } else { optarg.mode = si->op; } + optarg.max_interval = 0; optarg.similarity_threshold = 0; switch (si->op) { case GRN_OP_NEAR : case GRN_OP_NEAR2 : optarg.max_interval = si->max_interval; break; + case GRN_OP_SIMILAR : + optarg.similarity_threshold = si->similarity_threshold; + break; default : - optarg.max_interval = 0; break; } optarg.weight_vector = (int *)GRN_BULK_HEAD(&wv); @@ -5373,6 +5425,7 @@ typedef struct { grn_obj op_stack; grn_obj mode_stack; grn_obj max_interval_stack; + grn_obj similarity_threshold_stack; grn_operator default_op; grn_select_optarg opt; grn_operator default_mode; @@ -5935,6 +5988,16 @@ accept_query_string(grn_ctx *ctx, efs_info *efsi, grn_expr_append_op(efsi->ctx, efsi->e, mode, 3); } break; + case GRN_OP_SIMILAR : + { + int similarity_threshold; + similarity_threshold = + grn_int32_value_at(&efsi->similarity_threshold_stack, -1); + grn_expr_append_const_int(efsi->ctx, efsi->e, similarity_threshold, + GRN_OP_PUSH, 1); + grn_expr_append_op(efsi->ctx, efsi->e, mode, 3); + } + break; default : grn_expr_append_op(efsi->ctx, efsi->e, mode, 2); break; @@ -6022,12 +6085,12 @@ get_word_(grn_ctx *ctx, efs_info *q) PARSE(GRN_EXPR_TOKEN_IDENTIFIER); PARSE(GRN_EXPR_TOKEN_RELATIVE_OP); - GRN_PTR_PUT(ctx, &((grn_expr *)(q->e))->objs, c); + grn_expr_take_obj(ctx, q->e, c); GRN_PTR_PUT(ctx, &q->column_stack, c); GRN_INT32_PUT(ctx, &q->mode_stack, mode); return GRN_SUCCESS; - } else if (*end == GRN_QUERY_PREFIX) { + } else if (GRN_TEXT_LEN(&q->buf) > 1 && *end == GRN_QUERY_PREFIX) { q->cur = end + 1; GRN_INT32_PUT(ctx, &q->mode_stack, GRN_OP_PREFIX); break; @@ -6111,6 +6174,9 @@ parse_query(grn_ctx *ctx, efs_info *q) case GRN_OP_NEAR2 : GRN_INT32_PUT(ctx, &q->max_interval_stack, option); break; + case GRN_OP_SIMILAR : + GRN_INT32_PUT(ctx, &q->similarity_threshold_stack, option); + break; default : break; } @@ -6328,13 +6394,13 @@ done : goto exit; } if ((obj = grn_obj_column(ctx, q->table, name, name_size))) { - GRN_PTR_PUT(ctx, &((grn_expr *)q->e)->objs, obj); + grn_expr_take_obj(ctx, q->e, obj); PARSE(GRN_EXPR_TOKEN_IDENTIFIER); grn_expr_append_obj(ctx, q->e, obj, GRN_OP_GET_VALUE, 1); goto exit; } if ((obj = resolve_top_level_name(ctx, name, name_size))) { - GRN_PTR_PUT(ctx, &((grn_expr *)q->e)->objs, obj); + grn_expr_take_obj(ctx, q->e, obj); PARSE(GRN_EXPR_TOKEN_IDENTIFIER); grn_expr_append_obj(ctx, q->e, obj, GRN_OP_PUSH, 1); goto exit; @@ -6758,7 +6824,7 @@ parse_script(grn_ctx *ctx, efs_info *q) rest = rest_float; } else { const char *rest64 = rest; - unsigned int uint32 = grn_atoui(q->cur, q->str_end, &rest); + grn_atoui(q->cur, q->str_end, &rest); // checks to see grn_atoi failed (see above NOTE) if ((int64 > UINT32_MAX) || (q->str_end != rest && *rest >= '0' && *rest <= '9')) { @@ -6767,11 +6833,16 @@ parse_script(grn_ctx *ctx, efs_info *q) GRN_INT64_SET(ctx, &int64buf, int64); grn_expr_append_const(ctx, q->e, &int64buf, GRN_OP_PUSH, 1); rest = rest64; + } else if (int64 > INT32_MAX || int64 < INT32_MIN) { + grn_obj int64buf; + GRN_INT64_INIT(&int64buf, 0); + GRN_INT64_SET(ctx, &int64buf, int64); + grn_expr_append_const(ctx, q->e, &int64buf, GRN_OP_PUSH, 1); } else { - grn_obj uint32buf; - GRN_UINT32_INIT(&uint32buf, 0); - GRN_UINT32_SET(ctx, &uint32buf, uint32); - grn_expr_append_const(ctx, q->e, &uint32buf, GRN_OP_PUSH, 1); + grn_obj int32buf; + GRN_INT32_INIT(&int32buf, 0); + GRN_INT32_SET(ctx, &int32buf, (int32_t)int64); + grn_expr_append_const(ctx, q->e, &int32buf, GRN_OP_PUSH, 1); } } PARSE(GRN_EXPR_TOKEN_DECIMAL); @@ -6806,6 +6877,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr, GRN_INT32_INIT(&efsi.op_stack, GRN_OBJ_VECTOR); GRN_INT32_INIT(&efsi.mode_stack, GRN_OBJ_VECTOR); GRN_INT32_INIT(&efsi.max_interval_stack, GRN_OBJ_VECTOR); + GRN_INT32_INIT(&efsi.similarity_threshold_stack, GRN_OBJ_VECTOR); GRN_PTR_INIT(&efsi.column_stack, GRN_OBJ_VECTOR, GRN_ID_NIL); GRN_PTR_INIT(&efsi.token_stack, GRN_OBJ_VECTOR, GRN_ID_NIL); efsi.e = expr; @@ -6855,6 +6927,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr, GRN_OBJ_FIN(ctx, &efsi.op_stack); GRN_OBJ_FIN(ctx, &efsi.mode_stack); GRN_OBJ_FIN(ctx, &efsi.max_interval_stack); + GRN_OBJ_FIN(ctx, &efsi.similarity_threshold_stack); GRN_OBJ_FIN(ctx, &efsi.column_stack); GRN_OBJ_FIN(ctx, &efsi.token_stack); GRN_OBJ_FIN(ctx, &efsi.buf); diff --git a/storage/mroonga/vendor/groonga/lib/expr.h b/storage/mroonga/vendor/groonga/lib/expr.h index a42be924f58..67e134e47b9 100644 --- a/storage/mroonga/vendor/groonga/lib/expr.h +++ b/storage/mroonga/vendor/groonga/lib/expr.h @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2013 Brazil + Copyright(C) 2013-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -57,11 +57,14 @@ void grn_scan_info_set_end(scan_info *si, uint32_t end); void grn_scan_info_set_query(scan_info *si, grn_obj *query); int grn_scan_info_get_max_interval(scan_info *si); void grn_scan_info_set_max_interval(scan_info *si, int max_interval); +int grn_scan_info_get_similarity_threshold(scan_info *si); +void grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold); grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg); grn_obj *grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i); int32_t grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec); -grn_rc grn_expr_get_keywords(grn_ctx *ctx, grn_obj *expr, grn_obj *keywords); +void grn_expr_take_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj); +grn_obj *grn_expr_alloc_const(grn_ctx *ctx, grn_obj *expr); #ifdef __cplusplus } diff --git a/storage/mroonga/vendor/groonga/lib/groonga_in.h b/storage/mroonga/vendor/groonga/lib/groonga_in.h index f3fce4782cd..d7ecff2808b 100644 --- a/storage/mroonga/vendor/groonga/lib/groonga_in.h +++ b/storage/mroonga/vendor/groonga/lib/groonga_in.h @@ -50,9 +50,6 @@ #ifdef HAVE_SYS_TIME_H #include #endif /* HAVE_SYS_TIME_H */ -#ifdef HAVE_SYS_TIMEB_H -#include -#endif /* HAVE_SYS_TIMEB_H */ #ifdef HAVE_SYS_RESOURCE_H #include diff --git a/storage/mroonga/vendor/groonga/lib/hash.c b/storage/mroonga/vendor/groonga/lib/hash.c index 79402848f55..fa5854a6a93 100644 --- a/storage/mroonga/vendor/groonga/lib/hash.c +++ b/storage/mroonga/vendor/groonga/lib/hash.c @@ -1609,6 +1609,7 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, hash->normalizer = NULL; header->normalizer = GRN_ID_NIL; } + GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); grn_table_queue_init(ctx, &header->queue); hash->obj.header.flags = header->flags; @@ -1683,6 +1684,7 @@ grn_tiny_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path, hash->garbages = GRN_ID_NIL; hash->tokenizer = NULL; hash->normalizer = NULL; + GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); grn_tiny_array_init(ctx, &hash->a, entry_size, GRN_TINY_ARRAY_CLEAR); grn_tiny_bitmap_init(ctx, &hash->bitmap); return GRN_SUCCESS; @@ -1755,6 +1757,7 @@ grn_hash_open(grn_ctx *ctx, const char *path) } else { hash->normalizer = grn_ctx_at(ctx, header->normalizer); } + GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); hash->obj.header.flags = header->flags; return hash; } else { @@ -1779,6 +1782,8 @@ grn_tiny_hash_fin(grn_ctx *ctx, grn_hash *hash) return GRN_INVALID_ARGUMENT; } + GRN_OBJ_FIN(ctx, &(hash->token_filters)); + if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) { uint32_t num_remaining_entries = *hash->n_entries; grn_id *hash_ptr; @@ -1808,6 +1813,7 @@ grn_hash_close(grn_ctx *ctx, grn_hash *hash) if (!ctx || !hash) { return GRN_INVALID_ARGUMENT; } if (grn_hash_is_io_hash(hash)) { rc = grn_io_close(ctx, hash->io); + GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); } else { GRN_ASSERT(ctx == hash->ctx); rc = grn_tiny_hash_fin(ctx, hash); diff --git a/storage/mroonga/vendor/groonga/lib/hash.h b/storage/mroonga/vendor/groonga/lib/hash.h index 59cdec1223b..6f2e68de246 100644 --- a/storage/mroonga/vendor/groonga/lib/hash.h +++ b/storage/mroonga/vendor/groonga/lib/hash.h @@ -206,6 +206,7 @@ struct _grn_hash { uint32_t *max_offset; grn_obj *tokenizer; grn_obj *normalizer; + grn_obj token_filters; /* For grn_io_hash. */ grn_io *io; diff --git a/storage/mroonga/vendor/groonga/lib/ii.c b/storage/mroonga/vendor/groonga/lib/ii.c index f3b67628818..5dc203865ad 100644 --- a/storage/mroonga/vendor/groonga/lib/ii.c +++ b/storage/mroonga/vendor/groonga/lib/ii.c @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -3467,7 +3467,8 @@ _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uin free_histogram[i] = 0; } */ - if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL)) { + if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, + NULL, NULL)) { return NULL; } if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; } @@ -3589,7 +3590,8 @@ grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon) grn_obj_flags lflags; grn_encoding encoding; grn_obj *tokenizer; - if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL)) { + if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, + NULL, NULL)) { return NULL; } if (strlen(path) + 6 >= PATH_MAX) { return NULL; } @@ -4612,26 +4614,26 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr { grn_hash *h; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; grn_rc r, rc = GRN_SUCCESS; grn_vgram_buf *sbuf = NULL; if (!rid) { return GRN_INVALID_ARGUMENT; } - if (!(token = grn_token_open(ctx, lexicon, value, value_len, - GRN_TOKEN_ADD, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, + GRN_TOKEN_ADD, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } if (vgram) { sbuf = grn_vgram_buf_open(value_len); } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_add failed !"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } - while (!token->status) { - (tid = grn_token_next(ctx, token)); + while (!token_cursor->status) { + (tid = grn_token_cursor_next(ctx, token_cursor)); if (tid) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { @@ -4640,14 +4642,14 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, 0)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, 0)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_updspec_add on index_add failed!"); goto exit; } if (sbuf) { grn_vgram_buf_add(sbuf, tid); } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); // todo : support vgram // if (sbuf) { grn_vgram_update(vgram, rid, sbuf, (grn_set *)h); } GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { @@ -4659,7 +4661,7 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr return rc; exit: grn_hash_close(ctx, h); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } @@ -4670,34 +4672,34 @@ index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr { grn_hash *h; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; if (!rid) { return GRN_INVALID_ARGUMENT; } - if (!(token = grn_token_open(ctx, lexicon, value, value_len, - GRN_TOKEN_DEL, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, + GRN_TOKEN_DEL, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_del failed !"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, 0))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on index_del failed !"); grn_hash_close(ctx, h); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { if (*tp) { grn_ii_delete_one(ctx, ii, *tp, *u, NULL); @@ -4736,7 +4738,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i int j; grn_value *v; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_rc rc = GRN_SUCCESS; grn_hash *old, *new; grn_id tid, *tp; @@ -4754,32 +4756,32 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i goto exit; } for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) { - if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, - GRN_TOKEN_ADD, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, + GRN_TOKEN_ADD, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } if (!GRN_HASH_SIZE(new)) { @@ -4798,26 +4800,26 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i goto exit; } for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) { - if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, - GRN_TOKEN_DEL, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, + GRN_TOKEN_DEL, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; @@ -4825,7 +4827,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } } else { @@ -4870,7 +4872,7 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, int j; grn_id tid; grn_section *v; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_hash *h = (grn_hash *)out; grn_obj *lexicon = ii->lexicon; @@ -4879,10 +4881,10 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) { unsigned int token_flags = 0; if (v->length && - (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, - mode, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + (token_cursor = grn_token_cursor_open(ctx, lexicon, head + v->offset, v->length, + mode, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (posting) { GRN_RECORD_PUT(ctx, posting, tid); } if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; @@ -4890,18 +4892,18 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } } @@ -5353,18 +5355,22 @@ token_compare(const void *a, const void *b) inline static grn_rc token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, - token_info **tis, uint32_t *n, grn_operator mode) + token_info **tis, uint32_t *n, grn_bool *only_skip_token, + grn_operator mode) { token_info *ti; const char *key; uint32_t size; grn_rc rc = GRN_END_OF_DATA; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; - grn_token *token = grn_token_open(ctx, lexicon, string, string_len, - GRN_TOKEN_GET, token_flags); - if (!token) { return GRN_NO_MEMORY_AVAILABLE; } + grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon, + string, string_len, + GRN_TOKEN_GET, + token_flags); + *only_skip_token = GRN_FALSE; + if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_UNSPLIT) { - if ((ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, token->orig_blen, 0, EX_BOTH))) { + if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH))) { tis[(*n)++] = ti; rc = GRN_SUCCESS; } @@ -5385,48 +5391,53 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, ef = EX_NONE; break; } - tid = grn_token_next(ctx, token); - if (token->force_prefix) { ef |= EX_PREFIX; } - switch (token->status) { + tid = grn_token_cursor_next(ctx, token_cursor); + if (token_cursor->force_prefix) { ef |= EX_PREFIX; } + switch (token_cursor->status) { case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, ef & EX_SUFFIX); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX); break; case GRN_TOKEN_DONE : - ti = token_info_open(ctx, lexicon, ii, (const char *)token->curr, - token->curr_size, 0, ef); + ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, + token_cursor->curr_size, 0, ef); /* key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, token->curr, token->curr_size, token->pos, ef); - ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, - token->orig_blen, token->pos, ef); + ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, + token_cursor->orig_blen, token_cursor->pos, ef); */ break; case GRN_TOKEN_NOT_FOUND : - ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, - token->orig_blen, 0, ef); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, + token_cursor->orig_blen, 0, ef); break; + case GRN_TOKEN_DONE_SKIP : + *only_skip_token = GRN_TRUE; + goto exit; default : goto exit; } if (!ti) { goto exit ; } tis[(*n)++] = ti; - while (token->status == GRN_TOKEN_DOING) { - tid = grn_token_next(ctx, token); - switch (token->status) { + while (token_cursor->status == GRN_TOKEN_DOING) { + tid = grn_token_cursor_next(ctx, token_cursor); + switch (token_cursor->status) { + case GRN_TOKEN_DONE_SKIP : + continue; case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, EX_NONE); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE); break; case GRN_TOKEN_DONE : if (tid) { key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, ef & EX_PREFIX); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX); break; } /* else fallthru */ default : - ti = token_info_open(ctx, lexicon, ii, (char *)token->curr, - token->curr_size, token->pos, ef & EX_PREFIX); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr, + token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX); break; } if (!ti) { goto exit; } @@ -5435,7 +5446,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, rc = GRN_SUCCESS; } exit : - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return rc; } @@ -5644,35 +5655,36 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, grn_id tid, *tp, max_size; grn_rc rc = GRN_SUCCESS; grn_hash *h; - grn_token *token; + grn_token_cursor *token_cursor; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; grn_obj *lexicon = ii->lexicon; if (!lexicon || !ii || !string || !string_len || !s || !optarg) { return GRN_INVALID_ARGUMENT; } if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) { return GRN_NO_MEMORY_AVAILABLE; } - if (!(token = grn_token_open(ctx, lexicon, string, string_len, - GRN_TOKEN_GET, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len, + GRN_TOKEN_GET, token_flags))) { grn_hash_close(ctx, h); return GRN_NO_MEMORY_AVAILABLE; } if (!(max_size = optarg->max_size)) { max_size = 1048576; } - while (token->status != GRN_TOKEN_DONE) { - if ((tid = grn_token_next(ctx, token))) { + while (token_cursor->status != GRN_TOKEN_DONE && + token_cursor->status != GRN_TOKEN_DONE_SKIP) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&w1, NULL)) { (*w1)++; } } - if (tid && token->curr_size) { + if (tid && token_cursor->curr_size) { if (optarg->max_interval == GRN_OP_UNSPLIT) { - grn_table_search(ctx, lexicon, token->curr, token->curr_size, + grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR); } if (optarg->max_interval == GRN_OP_PARTIAL) { - grn_table_search(ctx, lexicon, token->curr, token->curr_size, + grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR); } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); { grn_hash_cursor *c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0); if (!c) { @@ -5702,7 +5714,12 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c; grn_ii_posting *pos; grn_wv_mode wvm = grn_wv_none; - grn_table_sort_optarg arg = {GRN_TABLE_SORT_DESC, NULL, (void *)sizeof(grn_id), 0}; + grn_table_sort_optarg arg = { + GRN_TABLE_SORT_DESC|GRN_TABLE_SORT_BY_VALUE|GRN_TABLE_SORT_AS_NUMBER, + NULL, + NULL, + 0 + }; grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0); if (!sorted) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_sort on grn_ii_similar_search failed !"); @@ -5851,6 +5868,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ int rep, orp, weight, max_interval = 0; token_info *ti, **tis = NULL, **tip, **tie; uint32_t n = 0, rid, sid, nrid, nsid; + grn_bool only_skip_token = GRN_FALSE; grn_operator mode = GRN_OP_EXACT; grn_wv_mode wvm = grn_wv_none; grn_obj *lexicon = ii->lexicon; @@ -5879,7 +5897,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_ if (!(tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) { return GRN_NO_MEMORY_AVAILABLE; } - if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, mode) || !n) { goto exit; } + if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode) || !n) { goto exit; } switch (mode) { case GRN_OP_NEAR2 : token_info_clear_offset(tis, n); @@ -6005,7 +6023,9 @@ exit : if (*tip) { token_info_close(ctx, *tip); } } if (tis) { GRN_FREE(tis); } - grn_ii_resolve_sel_and(ctx, s, op); + if (!only_skip_token) { + grn_ii_resolve_sel_and(ctx, s, op); + } // grn_hash_cursor_clear(r); bt_close(ctx, bt); #ifdef DEBUG @@ -6037,6 +6057,7 @@ grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len break; case GRN_OP_SIMILAR : arg.mode = optarg->mode; + arg.similarity_threshold = optarg->similarity_threshold; break; default : break; @@ -6700,7 +6721,7 @@ get_tmp_lexicon(grn_ctx *ctx, grn_ii_buffer *ii_buffer) grn_obj *normalizer; grn_obj_flags flags; grn_table_get_info(ctx, ii_buffer->lexicon, &flags, NULL, - &tokenizer, &normalizer); + &tokenizer, &normalizer, NULL); flags &= ~GRN_OBJ_PERSISTENT; tmp_lexicon = grn_table_create(ctx, NULL, 0, NULL, flags, domain, range); if (tmp_lexicon) { @@ -6756,7 +6777,7 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, } if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) { unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_id *buffer = ii_buffer->block_buf; uint32_t block_pos = ii_buffer->block_pos; buffer[block_pos++] = rid + II_BUFFER_RID_FLAG; @@ -6766,12 +6787,13 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, if (weight) { buffer[block_pos++] = weight + II_BUFFER_WEIGHT_FLAG; } - if ((token = grn_token_open(ctx, tmp_lexicon, value, - value_len, GRN_TOKEN_ADD, token_flags))) { + if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon, + value, value_len, + GRN_TOKEN_ADD, token_flags))) { uint32_t pos; - for (pos = 0; !token->status; pos++) { + for (pos = 0; !token_cursor->status; pos++) { grn_id tid; - if ((tid = grn_token_next(ctx, token))) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { ii_buffer_counter *counter; counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid); if (!counter) { return; } @@ -6810,7 +6832,7 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, counter->nposts++; } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } ii_buffer->block_pos = block_pos; } @@ -7151,7 +7173,7 @@ grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii, S_IRUSR|S_IWUSR); if (ii_buffer->tmpfd != -1) { grn_obj_flags flags; - grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL); + grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL, NULL); if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) { grn_pat_cache_enable(ctx, (grn_pat *)ii->lexicon, PAT_CACHE_SIZE); @@ -7274,7 +7296,7 @@ grn_ii_buffer_close(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { uint32_t i; grn_obj_flags flags; - grn_table_get_info(ctx, ii_buffer->ii->lexicon, &flags, NULL, NULL, NULL); + grn_table_get_info(ctx, ii_buffer->ii->lexicon, &flags, NULL, NULL, NULL, NULL); if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) { grn_pat_cache_disable(ctx, (grn_pat *)ii_buffer->ii->lexicon); } diff --git a/storage/mroonga/vendor/groonga/lib/io.h b/storage/mroonga/vendor/groonga/lib/io.h index 27538b381e2..04c75919c5f 100644 --- a/storage/mroonga/vendor/groonga/lib/io.h +++ b/storage/mroonga/vendor/groonga/lib/io.h @@ -75,6 +75,7 @@ typedef struct { #if defined(WIN32) && defined(WIN32_FMO_EACH) HANDLE fmo; #endif /* defined(WIN32) && defined(WIN32_FMO_EACH) */ + void *value; } grn_io_win; typedef struct { diff --git a/storage/mroonga/vendor/groonga/lib/mrb.h b/storage/mroonga/vendor/groonga/lib/mrb.h index 4cff30ef1e9..fe51e5e7670 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb.h +++ b/storage/mroonga/vendor/groonga/lib/mrb.h @@ -31,9 +31,9 @@ extern "C" { #endif #ifdef GRN_WITH_MRUBY -mrb_value grn_mrb_eval(grn_ctx *ctx, const char *script, int script_length); -mrb_value grn_mrb_load(grn_ctx *ctx, const char *path); -grn_rc grn_mrb_to_grn(grn_ctx *ctx, mrb_value mrb_object, grn_obj *grn_object); +GRN_API mrb_value grn_mrb_eval(grn_ctx *ctx, const char *script, int script_length); +GRN_API mrb_value grn_mrb_load(grn_ctx *ctx, const char *path); +GRN_API grn_rc grn_mrb_to_grn(grn_ctx *ctx, mrb_value mrb_object, grn_obj *grn_object); #endif #ifdef __cplusplus diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.c b/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.c index 81a584d0f95..3f7bc36202d 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.c +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.c @@ -34,9 +34,17 @@ ctx_class_instance(mrb_state *mrb, mrb_value klass) { grn_ctx *ctx = (grn_ctx *)mrb->ud; mrb_value mrb_ctx; - - mrb_ctx = mrb_obj_value(mrb_obj_alloc(mrb, MRB_TT_DATA, mrb_class_ptr(klass))); - DATA_PTR(mrb_ctx) = ctx; + mrb_sym iv_name; + + iv_name = mrb_intern_lit(mrb, "@instance"); + mrb_ctx = mrb_iv_get(mrb, klass, iv_name); + if (mrb_nil_p(mrb_ctx)) { + struct RBasic *raw_mrb_ctx; + raw_mrb_ctx = mrb_obj_alloc(mrb, MRB_TT_DATA, mrb_class_ptr(klass)); + mrb_ctx = mrb_obj_value(raw_mrb_ctx); + DATA_PTR(mrb_ctx) = ctx; + mrb_iv_set(mrb, klass, iv_name, mrb_ctx); + } return mrb_ctx; } @@ -179,6 +187,471 @@ ctx_set_error_message(mrb_state *mrb, mrb_value self) return error_message; } +void +grn_mrb_ctx_check(mrb_state *mrb) +{ + grn_ctx *ctx = (grn_ctx *)mrb->ud; + grn_mrb_data *data = &(ctx->impl->mrb); + struct RClass *module = data->module; + struct RClass *error_class; +#define MESSAGE_SIZE 4096 + char message[MESSAGE_SIZE]; + + switch (ctx->rc) { + case GRN_SUCCESS: + return; + case GRN_END_OF_DATA: + error_class = mrb_class_get_under(mrb, module, "EndOfData"); + snprintf(message, MESSAGE_SIZE, + "end of data: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_UNKNOWN_ERROR: + error_class = mrb_class_get_under(mrb, module, "UnknownError"); + snprintf(message, MESSAGE_SIZE, + "unknown error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_OPERATION_NOT_PERMITTED: + error_class = mrb_class_get_under(mrb, module, "OperationNotPermitted"); + snprintf(message, MESSAGE_SIZE, + "operation not permitted: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_SUCH_FILE_OR_DIRECTORY: + error_class = mrb_class_get_under(mrb, module, "NoSuchFileOrDirectory"); + snprintf(message, MESSAGE_SIZE, + "no such file or directory: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_SUCH_PROCESS: + error_class = mrb_class_get_under(mrb, module, "NoSuchProcess"); + snprintf(message, MESSAGE_SIZE, + "no such process: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INTERRUPTED_FUNCTION_CALL: + error_class = mrb_class_get_under(mrb, module, "InterruptedFunctionCall"); + snprintf(message, MESSAGE_SIZE, + "interrupted function call: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INPUT_OUTPUT_ERROR: + error_class = mrb_class_get_under(mrb, module, "InputOutputError"); + snprintf(message, MESSAGE_SIZE, + "input output error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_SUCH_DEVICE_OR_ADDRESS: + error_class = mrb_class_get_under(mrb, module, "NoSuchDeviceOrAddress"); + snprintf(message, MESSAGE_SIZE, + "no such device or address: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_ARG_LIST_TOO_LONG: + error_class = mrb_class_get_under(mrb, module, "ArgListTooLong"); + snprintf(message, MESSAGE_SIZE, + "arg list too long: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_EXEC_FORMAT_ERROR: + error_class = mrb_class_get_under(mrb, module, "ExecFormatError"); + snprintf(message, MESSAGE_SIZE, + "exec format error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_BAD_FILE_DESCRIPTOR: + error_class = mrb_class_get_under(mrb, module, "BadFileDescriptor"); + snprintf(message, MESSAGE_SIZE, + "bad file descriptor: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_CHILD_PROCESSES: + error_class = mrb_class_get_under(mrb, module, "NoChildProcesses"); + snprintf(message, MESSAGE_SIZE, + "no child processes: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RESOURCE_TEMPORARILY_UNAVAILABLE: + error_class = mrb_class_get_under(mrb, module, + "ResourceTemporarilyUnavailable"); + snprintf(message, MESSAGE_SIZE, + "resource temporarily unavailable: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NOT_ENOUGH_SPACE: + error_class = mrb_class_get_under(mrb, module, "NotEnoughSpace"); + snprintf(message, MESSAGE_SIZE, + "not enough space: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_PERMISSION_DENIED: + error_class = mrb_class_get_under(mrb, module, "PermissionDenied"); + snprintf(message, MESSAGE_SIZE, + "permission denied: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_BAD_ADDRESS: + error_class = mrb_class_get_under(mrb, module, "BadAddress"); + snprintf(message, MESSAGE_SIZE, + "bad address: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RESOURCE_BUSY: + error_class = mrb_class_get_under(mrb, module, "ResourceBusy"); + snprintf(message, MESSAGE_SIZE, + "resource busy: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_FILE_EXISTS: + error_class = mrb_class_get_under(mrb, module, "FileExists"); + snprintf(message, MESSAGE_SIZE, + "file exists: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_IMPROPER_LINK: + error_class = mrb_class_get_under(mrb, module, "ImproperLink"); + snprintf(message, MESSAGE_SIZE, + "improper link: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_SUCH_DEVICE: + error_class = mrb_class_get_under(mrb, module, "NoSuchDevice"); + snprintf(message, MESSAGE_SIZE, + "no such device: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NOT_A_DIRECTORY: + error_class = mrb_class_get_under(mrb, module, "NotDirectory"); + snprintf(message, MESSAGE_SIZE, + "not directory: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_IS_A_DIRECTORY: + error_class = mrb_class_get_under(mrb, module, "IsDirectory"); + snprintf(message, MESSAGE_SIZE, + "is directory: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INVALID_ARGUMENT: + error_class = mrb_class_get_under(mrb, module, "InvalidArgument"); + snprintf(message, MESSAGE_SIZE, + "invalid argument: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_MANY_OPEN_FILES_IN_SYSTEM: + error_class = mrb_class_get_under(mrb, module, "TooManyOpenFilesInSystem"); + snprintf(message, MESSAGE_SIZE, + "too many open files in system: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_MANY_OPEN_FILES: + error_class = mrb_class_get_under(mrb, module, "TooManyOpenFiles"); + snprintf(message, MESSAGE_SIZE, + "too many open files: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INAPPROPRIATE_I_O_CONTROL_OPERATION: + error_class = mrb_class_get_under(mrb, module, + "InappropriateIOControlOperation"); + snprintf(message, MESSAGE_SIZE, + "inappropriate IO control operation: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_FILE_TOO_LARGE: + error_class = mrb_class_get_under(mrb, module, "FileTooLarge"); + snprintf(message, MESSAGE_SIZE, + "file too large: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_SPACE_LEFT_ON_DEVICE: + error_class = mrb_class_get_under(mrb, module, "NoSpaceLeftOnDevice"); + snprintf(message, MESSAGE_SIZE, + "no space left on device: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INVALID_SEEK: + error_class = mrb_class_get_under(mrb, module, "InvalidSeek"); + snprintf(message, MESSAGE_SIZE, + "invalid seek: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_READ_ONLY_FILE_SYSTEM: + error_class = mrb_class_get_under(mrb, module, "ReadOnlyFileSystem"); + snprintf(message, MESSAGE_SIZE, + "read only file system: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_MANY_LINKS: + error_class = mrb_class_get_under(mrb, module, "TooManyLinks"); + snprintf(message, MESSAGE_SIZE, + "too many links: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_BROKEN_PIPE: + error_class = mrb_class_get_under(mrb, module, "BrokenPipe"); + snprintf(message, MESSAGE_SIZE, + "broken pipe: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_DOMAIN_ERROR: + error_class = mrb_class_get_under(mrb, module, "DomainError"); + snprintf(message, MESSAGE_SIZE, + "domain error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RESULT_TOO_LARGE: + error_class = mrb_class_get_under(mrb, module, "ResultTooLarge"); + snprintf(message, MESSAGE_SIZE, + "result too large: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RESOURCE_DEADLOCK_AVOIDED: + error_class = mrb_class_get_under(mrb, module, "ResourceDeadlockAvoided"); + snprintf(message, MESSAGE_SIZE, + "resource deadlock avoided: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_MEMORY_AVAILABLE: + error_class = mrb_class_get_under(mrb, module, "NoMemoryAvailable"); + snprintf(message, MESSAGE_SIZE, + "no memory available: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_FILENAME_TOO_LONG: + error_class = mrb_class_get_under(mrb, module, "FilenameTooLong"); + snprintf(message, MESSAGE_SIZE, + "filename too long: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_LOCKS_AVAILABLE: + error_class = mrb_class_get_under(mrb, module, "NoLocksAvailable"); + snprintf(message, MESSAGE_SIZE, + "no locks available: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_FUNCTION_NOT_IMPLEMENTED: + error_class = mrb_class_get_under(mrb, module, "FunctionNotImplemented"); + snprintf(message, MESSAGE_SIZE, + "function not implemented: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_DIRECTORY_NOT_EMPTY: + error_class = mrb_class_get_under(mrb, module, "DirectoryNotEmpty"); + snprintf(message, MESSAGE_SIZE, + "directory not empty: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_ILLEGAL_BYTE_SEQUENCE: + error_class = mrb_class_get_under(mrb, module, "IllegalByteSequence"); + snprintf(message, MESSAGE_SIZE, + "illegal byte sequence: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_SOCKET_NOT_INITIALIZED: + error_class = mrb_class_get_under(mrb, module, "SocketNotInitialized"); + snprintf(message, MESSAGE_SIZE, + "socket not initialized: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_OPERATION_WOULD_BLOCK: + error_class = mrb_class_get_under(mrb, module, "OperationWouldBlock"); + snprintf(message, MESSAGE_SIZE, + "operation would block: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_ADDRESS_IS_NOT_AVAILABLE: + error_class = mrb_class_get_under(mrb, module, "AddressIsNotAvailable"); + snprintf(message, MESSAGE_SIZE, + "address is not available: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NETWORK_IS_DOWN: + error_class = mrb_class_get_under(mrb, module, "NetworkIsDown"); + snprintf(message, MESSAGE_SIZE, + "network is down: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NO_BUFFER: + error_class = mrb_class_get_under(mrb, module, "NoBuffer"); + snprintf(message, MESSAGE_SIZE, + "no buffer: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_SOCKET_IS_ALREADY_CONNECTED: + error_class = mrb_class_get_under(mrb, module, "SocketIsAlreadyConnected"); + snprintf(message, MESSAGE_SIZE, + "socket is already connected: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_SOCKET_IS_NOT_CONNECTED: + error_class = mrb_class_get_under(mrb, module, "SocketIsNotConnected"); + snprintf(message, MESSAGE_SIZE, + "socket is not connected: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_SOCKET_IS_ALREADY_SHUTDOWNED: + error_class = mrb_class_get_under(mrb, module, "SocketIsAlreadyShutdowned"); + snprintf(message, MESSAGE_SIZE, + "socket is already shutdowned: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_OPERATION_TIMEOUT: + error_class = mrb_class_get_under(mrb, module, "OperationTimeout"); + snprintf(message, MESSAGE_SIZE, + "operation timeout: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_CONNECTION_REFUSED: + error_class = mrb_class_get_under(mrb, module, "ConnectionRefused"); + snprintf(message, MESSAGE_SIZE, + "connection refused: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RANGE_ERROR: + error_class = mrb_class_get_under(mrb, module, "RangeError"); + snprintf(message, MESSAGE_SIZE, + "range error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOKENIZER_ERROR: + error_class = mrb_class_get_under(mrb, module, "TokenizerError"); + snprintf(message, MESSAGE_SIZE, + "tokenizer error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_FILE_CORRUPT: + error_class = mrb_class_get_under(mrb, module, "FileCorrupt"); + snprintf(message, MESSAGE_SIZE, + "file corrupt: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INVALID_FORMAT: + error_class = mrb_class_get_under(mrb, module, "InvalidFormat"); + snprintf(message, MESSAGE_SIZE, + "invalid format: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_OBJECT_CORRUPT: + error_class = mrb_class_get_under(mrb, module, "ObjectCorrupt"); + snprintf(message, MESSAGE_SIZE, + "object corrupt: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_MANY_SYMBOLIC_LINKS: + error_class = mrb_class_get_under(mrb, module, "TooManySymbolicLinks"); + snprintf(message, MESSAGE_SIZE, + "too many symbolic links: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NOT_SOCKET: + error_class = mrb_class_get_under(mrb, module, "NotSocket"); + snprintf(message, MESSAGE_SIZE, + "not socket: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_OPERATION_NOT_SUPPORTED: + error_class = mrb_class_get_under(mrb, module, "OperationNotSupported"); + snprintf(message, MESSAGE_SIZE, + "operation not supported: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_ADDRESS_IS_IN_USE: + error_class = mrb_class_get_under(mrb, module, "AddressIsInUse"); + snprintf(message, MESSAGE_SIZE, + "address is in use: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_ZLIB_ERROR: + error_class = mrb_class_get_under(mrb, module, "ZlibError"); + snprintf(message, MESSAGE_SIZE, + "zlib error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_LZO_ERROR: + error_class = mrb_class_get_under(mrb, module, "LzoError"); + snprintf(message, MESSAGE_SIZE, + "LZO error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_STACK_OVER_FLOW: + error_class = mrb_class_get_under(mrb, module, "StackOverFlow"); + snprintf(message, MESSAGE_SIZE, + "stack over flow: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_SYNTAX_ERROR: + error_class = mrb_class_get_under(mrb, module, "SyntaxError"); + snprintf(message, MESSAGE_SIZE, + "syntax error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_RETRY_MAX: + error_class = mrb_class_get_under(mrb, module, "RetryMax"); + snprintf(message, MESSAGE_SIZE, + "retry max: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_INCOMPATIBLE_FILE_FORMAT: + error_class = mrb_class_get_under(mrb, module, "IncompatibleFileFormat"); + snprintf(message, MESSAGE_SIZE, + "incompatible file format: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_UPDATE_NOT_ALLOWED: + error_class = mrb_class_get_under(mrb, module, "UpdateNotAllowed"); + snprintf(message, MESSAGE_SIZE, + "update not allowed: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_SMALL_OFFSET: + error_class = mrb_class_get_under(mrb, module, "TooSmallOffset"); + snprintf(message, MESSAGE_SIZE, + "too small offset: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_LARGE_OFFSET: + error_class = mrb_class_get_under(mrb, module, "TooLargeOffset"); + snprintf(message, MESSAGE_SIZE, + "too large offset: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_TOO_SMALL_LIMIT: + error_class = mrb_class_get_under(mrb, module, "TooSmallLimit"); + snprintf(message, MESSAGE_SIZE, + "too small limit: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_CAS_ERROR: + error_class = mrb_class_get_under(mrb, module, "CASError"); + snprintf(message, MESSAGE_SIZE, + "CAS error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_UNSUPPORTED_COMMAND_VERSION: + error_class = mrb_class_get_under(mrb, module, "UnsupportedCommandVersion"); + snprintf(message, MESSAGE_SIZE, + "unsupported command version: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + case GRN_NORMALIZER_ERROR: + error_class = mrb_class_get_under(mrb, module, "NormalizerError"); + snprintf(message, MESSAGE_SIZE, + "normalizer error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + default: + error_class = mrb_class_get_under(mrb, module, "Error"); + snprintf(message, MESSAGE_SIZE, + "unsupported error: <%s>(%d)", + ctx->errbuf, ctx->rc); + break; + } +#undef MESSAGE_SIZE + + mrb_raise(mrb, error_class, message); +} + void grn_mrb_ctx_init(grn_ctx *ctx) { diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.h b/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.h index f620f941c93..5068d0d75e2 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.h +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_ctx.h @@ -26,6 +26,9 @@ extern "C" { #endif void grn_mrb_ctx_init(grn_ctx *ctx); +#ifdef GRN_WITH_MRUBY +void grn_mrb_ctx_check(mrb_state *mrb); +#endif #ifdef __cplusplus } diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.c b/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.c new file mode 100644 index 00000000000..2f45cfc736e --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.c @@ -0,0 +1,185 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2014 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "../ctx_impl.h" + +#ifdef GRN_WITH_MRUBY +#include + +#include "../mrb.h" +#include "mrb_error.h" + +void +grn_mrb_error_init(grn_ctx *ctx) +{ + grn_mrb_data *data = &(ctx->impl->mrb); + mrb_state *mrb = data->state; + struct RClass *module = data->module; + struct RClass *error_class; + + error_class = mrb_define_class_under(mrb, module, "Error", + mrb->eStandardError_class); + + mrb_define_class_under(mrb, module, "EndOfData", + error_class); + mrb_define_class_under(mrb, module, "UnknownError", + error_class); + mrb_define_class_under(mrb, module, "OperationNotPermitted", + error_class); + mrb_define_class_under(mrb, module, "NoSuchFileOrDirectory", + error_class); + mrb_define_class_under(mrb, module, "NoSuchProcess", + error_class); + mrb_define_class_under(mrb, module, "InterruptedFunctionCall", + error_class); + mrb_define_class_under(mrb, module, "InputOutputError", + error_class); + mrb_define_class_under(mrb, module, "NoSuchDeviceOrAddress", + error_class); + mrb_define_class_under(mrb, module, "ArgListTooLong", + error_class); + mrb_define_class_under(mrb, module, "ExecFormatError", + error_class); + mrb_define_class_under(mrb, module, "BadFileDescriptor", + error_class); + mrb_define_class_under(mrb, module, "NoChildProcesses", + error_class); + mrb_define_class_under(mrb, module, "ResourceTemporarilyUnavailable", + error_class); + mrb_define_class_under(mrb, module, "NotEnoughSpace", + error_class); + mrb_define_class_under(mrb, module, "PermissionDenied", + error_class); + mrb_define_class_under(mrb, module, "BadAddress", + error_class); + mrb_define_class_under(mrb, module, "ResourceBusy", + error_class); + mrb_define_class_under(mrb, module, "FileExists", + error_class); + mrb_define_class_under(mrb, module, "ImproperLink", + error_class); + mrb_define_class_under(mrb, module, "NoSuchDevice", + error_class); + mrb_define_class_under(mrb, module, "NotDirectory", + error_class); + mrb_define_class_under(mrb, module, "IsDirectory", + error_class); + mrb_define_class_under(mrb, module, "InvalidArgument", + error_class); + mrb_define_class_under(mrb, module, "TooManyOpenFilesInSystem", + error_class); + mrb_define_class_under(mrb, module, "TooManyOpenFiles", + error_class); + mrb_define_class_under(mrb, module, "InappropriateIOControlOperation", + error_class); + mrb_define_class_under(mrb, module, "FileTooLarge", + error_class); + mrb_define_class_under(mrb, module, "NoSpaceLeftOnDevice", + error_class); + mrb_define_class_under(mrb, module, "InvalidSeek", + error_class); + mrb_define_class_under(mrb, module, "ReadOnlyFileSystem", + error_class); + mrb_define_class_under(mrb, module, "TooManyLinks", + error_class); + mrb_define_class_under(mrb, module, "BrokenPipe", + error_class); + mrb_define_class_under(mrb, module, "DomainError", + error_class); + mrb_define_class_under(mrb, module, "ResultTooLarge", + error_class); + mrb_define_class_under(mrb, module, "ResourceDeadlockAvoided", + error_class); + mrb_define_class_under(mrb, module, "NoMemoryAvailable", + error_class); + mrb_define_class_under(mrb, module, "FilenameTooLong", + error_class); + mrb_define_class_under(mrb, module, "NoLocksAvailable", + error_class); + mrb_define_class_under(mrb, module, "FunctionNotImplemented", + error_class); + mrb_define_class_under(mrb, module, "DirectoryNotEmpty", + error_class); + mrb_define_class_under(mrb, module, "IllegalByteSequence", + error_class); + mrb_define_class_under(mrb, module, "SocketNotInitialized", + error_class); + mrb_define_class_under(mrb, module, "OperationWouldBlock", + error_class); + mrb_define_class_under(mrb, module, "AddressIsNotAvailable", + error_class); + mrb_define_class_under(mrb, module, "NetworkIsDown", + error_class); + mrb_define_class_under(mrb, module, "NoBuffer", + error_class); + mrb_define_class_under(mrb, module, "SocketIsAlreadyConnected", + error_class); + mrb_define_class_under(mrb, module, "SocketIsNotConnected", + error_class); + mrb_define_class_under(mrb, module, "SocketIsAlreadyShutdowned", + error_class); + mrb_define_class_under(mrb, module, "OperationTimeout", + error_class); + mrb_define_class_under(mrb, module, "ConnectionRefused", + error_class); + mrb_define_class_under(mrb, module, "RangeError", + error_class); + mrb_define_class_under(mrb, module, "TokenizerError", + error_class); + mrb_define_class_under(mrb, module, "FileCorrupt", + error_class); + mrb_define_class_under(mrb, module, "InvalidFormat", + error_class); + mrb_define_class_under(mrb, module, "ObjectCorrupt", + error_class); + mrb_define_class_under(mrb, module, "TooManySymbolicLinks", + error_class); + mrb_define_class_under(mrb, module, "NotSocket", + error_class); + mrb_define_class_under(mrb, module, "OperationNotSupported", + error_class); + mrb_define_class_under(mrb, module, "AddressIsInUse", + error_class); + mrb_define_class_under(mrb, module, "ZlibError", + error_class); + mrb_define_class_under(mrb, module, "LzoError", + error_class); + mrb_define_class_under(mrb, module, "StackOverFlow", + error_class); + mrb_define_class_under(mrb, module, "SyntaxError", + error_class); + mrb_define_class_under(mrb, module, "RetryMax", + error_class); + mrb_define_class_under(mrb, module, "IncompatibleFileFormat", + error_class); + mrb_define_class_under(mrb, module, "UpdateNotAllowed", + error_class); + mrb_define_class_under(mrb, module, "TooSmallOffset", + error_class); + mrb_define_class_under(mrb, module, "TooLargeOffset", + error_class); + mrb_define_class_under(mrb, module, "TooSmallLimit", + error_class); + mrb_define_class_under(mrb, module, "CASError", + error_class); + mrb_define_class_under(mrb, module, "UnsupportedCommandVersion", + error_class); + mrb_define_class_under(mrb, module, "NormalizerError", + error_class); +} +#endif diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.h b/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.h new file mode 100644 index 00000000000..c59fabc12af --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_error.h @@ -0,0 +1,34 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2014 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef GRN_MRB_ERROR_H +#define GRN_MRB_ERROR_H + +#include "../ctx.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void grn_mrb_error_init(grn_ctx *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* GRN_MRB_ERROR_H */ diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_expr.c b/storage/mroonga/vendor/groonga/lib/mrb/mrb_expr.c index b9fb35020cb..3d0499c6b5c 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/mrb_expr.c +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_expr.c @@ -23,12 +23,14 @@ #include #include #include +#include #include #include "../expr.h" #include "../util.h" #include "../mrb.h" #include "mrb_accessor.h" +#include "mrb_ctx.h" #include "mrb_expr.h" #include "mrb_converter.h" @@ -230,6 +232,29 @@ mrb_grn_scan_info_get_max_interval(mrb_state *mrb, mrb_value self) return mrb_fixnum_value(max_interval); } +static mrb_value +mrb_grn_scan_info_set_similarity_threshold(mrb_state *mrb, mrb_value self) +{ + scan_info *si; + int similarity_threshold; + + mrb_get_args(mrb, "i", &similarity_threshold); + si = DATA_PTR(self); + grn_scan_info_set_similarity_threshold(si, similarity_threshold); + return self; +} + +static mrb_value +mrb_grn_scan_info_get_similarity_threshold(mrb_state *mrb, mrb_value self) +{ + scan_info *si; + int similarity_threshold; + + si = DATA_PTR(self); + similarity_threshold = grn_scan_info_get_similarity_threshold(si); + return mrb_fixnum_value(similarity_threshold); +} + static mrb_value mrb_grn_scan_info_get_arg(mrb_state *mrb, mrb_value self) { @@ -339,6 +364,51 @@ mrb_grn_expression_get_var_by_offset(mrb_state *mrb, mrb_value self) return grn_mrb_value_from_grn_obj(mrb, var); } +static mrb_value +mrb_grn_expression_take_object(mrb_state *mrb, mrb_value self) +{ + grn_ctx *ctx = (grn_ctx *)mrb->ud; + grn_obj *expr; + mrb_value mrb_object; + grn_obj *grn_object; + + mrb_get_args(mrb, "o", &mrb_object); + expr = DATA_PTR(self); + grn_object = DATA_PTR(mrb_object); + grn_expr_take_obj(ctx, expr, grn_object); + + return mrb_object; +} + +static mrb_value +mrb_grn_expression_allocate_constant(mrb_state *mrb, mrb_value self) +{ + grn_ctx *ctx = (grn_ctx *)mrb->ud; + grn_obj *expr; + mrb_value mrb_object; + grn_obj *grn_object; + + mrb_get_args(mrb, "o", &mrb_object); + expr = DATA_PTR(self); + + switch (mrb_type(mrb_object)) { + case MRB_TT_STRING: + grn_object = grn_expr_alloc_const(ctx, expr); + if (!grn_object) { + grn_mrb_ctx_check(mrb); + } + GRN_TEXT_INIT(grn_object, 0); + GRN_TEXT_SET(ctx, grn_object, + RSTRING_PTR(mrb_object), RSTRING_LEN(mrb_object)); + break; + default: + mrb_raisef(mrb, E_ARGUMENT_ERROR, "unsupported type: %S", mrb_object); + break; + } + + return grn_mrb_value_from_grn_obj(mrb, grn_object); +} + void grn_mrb_expr_init(grn_ctx *ctx) { @@ -373,6 +443,10 @@ grn_mrb_expr_init(grn_ctx *ctx) mrb_grn_scan_info_get_max_interval, MRB_ARGS_NONE()); mrb_define_method(mrb, klass, "max_interval=", mrb_grn_scan_info_set_max_interval, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, klass, "similarity_threshold", + mrb_grn_scan_info_get_similarity_threshold, MRB_ARGS_NONE()); + mrb_define_method(mrb, klass, "similarity_threshold=", + mrb_grn_scan_info_set_similarity_threshold, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "get_arg", mrb_grn_scan_info_get_arg, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "push_arg", @@ -400,6 +474,10 @@ grn_mrb_expr_init(grn_ctx *ctx) mrb_grn_expression_codes, MRB_ARGS_NONE()); mrb_define_method(mrb, klass, "get_var_by_offset", mrb_grn_expression_get_var_by_offset, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, klass, "take_object", + mrb_grn_expression_take_object, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, klass, "allocate_constant", + mrb_grn_expression_allocate_constant, MRB_ARGS_REQ(1)); grn_mrb_load(ctx, "expression.rb"); grn_mrb_load(ctx, "scan_info.rb"); diff --git a/storage/mroonga/vendor/groonga/lib/mrb/mrb_obj.c b/storage/mroonga/vendor/groonga/lib/mrb/mrb_obj.c index de3bf678c00..142a242b980 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/mrb_obj.c +++ b/storage/mroonga/vendor/groonga/lib/mrb/mrb_obj.c @@ -91,6 +91,26 @@ object_grn_inspect(mrb_state *mrb, mrb_value self) return inspected; } +static mrb_value +object_equal(mrb_state *mrb, mrb_value self) +{ + grn_obj *object, *other_object; + mrb_value mrb_other; + + mrb_get_args(mrb, "o", &mrb_other); + if (!mrb_obj_is_kind_of(mrb, mrb_other, mrb_obj_class(mrb, self))) { + return mrb_false_value(); + } + + object = DATA_PTR(self); + other_object = DATA_PTR(mrb_other); + if (object == other_object) { + return mrb_true_value(); + } else { + return mrb_false_value(); + } +} + void grn_mrb_obj_init(grn_ctx *ctx) { @@ -108,6 +128,7 @@ grn_mrb_obj_init(grn_ctx *ctx) object_find_index, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "grn_inspect", object_grn_inspect, MRB_ARGS_NONE()); + mrb_define_method(mrb, klass, "==", object_equal, MRB_ARGS_REQ(1)); grn_mrb_load(ctx, "index_info.rb"); } diff --git a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info.rb b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info.rb index 9977d9bd8d7..cf0056d7fd3 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info.rb +++ b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info.rb @@ -16,6 +16,9 @@ module Groonga if data.max_interval self.max_interval = data.max_interval end + if data.similarity_threshold + self.similarity_threshold = data.similarity_threshold + end data.args.each do |arg| push_arg(arg) end diff --git a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb index 0d88f4027aa..5e258e90e5a 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb +++ b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_builder.rb @@ -123,7 +123,7 @@ module Groonga put_logical_op(@operator, n_codes) end - @data_list + optimize end private @@ -239,5 +239,109 @@ module Groonga end end end + + def optimize + optimized_data_list = [] + i = 0 + n = @data_list.size + while i < n + data = @data_list[i] + next_data = @data_list[i + 1] + i += 1 + if next_data.nil? + optimized_data_list << data + next + end + if range_operations?(data, next_data) + between_data = create_between_data(data, next_data) + optimized_data_list << between_data + i += 1 + next + end + optimized_data_list << data + end + optimized_data_list + end + + def range_operations?(data, next_data) + return false unless next_data.logical_op == Operator::AND + + op, next_op = data.op, next_data.op + return false if !(lower_condition?(op) or lower_condition?(next_op)) + return false if !(upper_condition?(op) or upper_condition?(next_op)) + + return false if data.args[0] != next_data.args[0] + + data_indexes = data.indexes + return false if data_indexes.empty? + + data_indexes == next_data.indexes + end + + def lower_condition?(operator) + case operator + when Operator::GREATER, Operator::GREATER_EQUAL + true + else + false + end + end + + def upper_condition?(operator) + case operator + when Operator::LESS, Operator::LESS_EQUAL + true + else + false + end + end + + def create_between_data(data, next_data) + between_data = ScanInfoData.new(data.start) + between_data.end = next_data.end + 1 + between_data.flags = data.flags + between_data.op = Operator::CALL + between_data.logical_op = data.logical_op + between_data.args = create_between_data_args(data, next_data) + between_data.indexes = data.indexes + between_data + end + + def create_between_data_args(data, next_data) + between = Context.instance["between"] + @expression.take_object(between) + column = data.args[0] + op, next_op = data.op, next_data.op + if lower_condition?(op) + min = data.args[1] + min_operator = op + max = next_data.args[1] + max_operator = next_op + else + min = next_data.args[1] + min_operator = next_op + max = data.args[1] + max_operator = op + end + if min_operator == Operator::GREATER + min_border = "exclude" + else + min_border = "include" + end + if max_operator == Operator::LESS + max_border = "exclude" + else + max_border = "include" + end + + [ + between, + column, + min, + @expression.allocate_constant(min_border), + max, + @expression.allocate_constant(max_border), + ] + end end end diff --git a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_data.rb b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_data.rb index 87eb9ed4937..67d0bc3aef7 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_data.rb +++ b/storage/mroonga/vendor/groonga/lib/mrb/scripts/scan_info_data.rb @@ -9,6 +9,7 @@ module Groonga attr_accessor :indexes attr_accessor :flags attr_accessor :max_interval + attr_accessor :similarity_threshold def initialize(start) @start = start @end = 0 @@ -19,11 +20,14 @@ module Groonga @indexes = [] @flags = ScanInfo::Flags::PUSH @max_interval = nil + @similarity_threshold = nil end def match_resolve_index if near_search? match_near_resolve_index + elsif similar_search? + match_similar_resolve_index else match_generic_resolve_index end @@ -60,6 +64,29 @@ module Groonga self.max_interval = @args[2].value end + def similar_search? + @op == Operator::SIMILAR and @args.size == 3 + end + + def match_similar_resolve_index + arg = @args[0] + case arg + when Expression + match_resolve_index_expression(arg) + when Accessor + match_resolve_index_accessor(arg) + when Object + match_resolve_index_db_obj(arg) + else + message = + "The first argument of SIMILAR must be Expression, Accessor or Object: #{arg.class}" + raise message + end + + self.query = @args[1] + self.similarity_threshold = @args[2].value + end + def match_generic_resolve_index @args.each do |arg| case arg diff --git a/storage/mroonga/vendor/groonga/lib/mrb/sources.am b/storage/mroonga/vendor/groonga/lib/mrb/sources.am index 858b2205d4f..947607c7ecd 100644 --- a/storage/mroonga/vendor/groonga/lib/mrb/sources.am +++ b/storage/mroonga/vendor/groonga/lib/mrb/sources.am @@ -9,6 +9,8 @@ libgrnmrb_la_SOURCES = \ mrb_converter.h \ mrb_ctx.c \ mrb_ctx.h \ + mrb_error.c \ + mrb_error.h \ mrb_expr.c \ mrb_expr.h \ mrb_fixed_size_column.c \ diff --git a/storage/mroonga/vendor/groonga/lib/output.c b/storage/mroonga/vendor/groonga/lib/output.c index 19bfd73819d..9b745c89a01 100644 --- a/storage/mroonga/vendor/groonga/lib/output.c +++ b/storage/mroonga/vendor/groonga/lib/output.c @@ -1411,6 +1411,7 @@ grn_output_obj(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type, break; case GRN_TABLE_HASH_KEY : case GRN_TABLE_PAT_KEY : + case GRN_TABLE_DAT_KEY : case GRN_TABLE_NO_KEY : grn_output_table(ctx, outbuf, output_type, obj, format); break; diff --git a/storage/mroonga/vendor/groonga/lib/pat.c b/storage/mroonga/vendor/groonga/lib/pat.c index 4ca2ffc42e0..5b03e6132e9 100644 --- a/storage/mroonga/vendor/groonga/lib/pat.c +++ b/storage/mroonga/vendor/groonga/lib/pat.c @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -432,6 +432,7 @@ _grn_pat_create(grn_ctx *ctx, grn_pat *pat, pat->normalizer = NULL; header->normalizer = GRN_ID_NIL; } + GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); pat->io = io; pat->header = header; pat->key_size = key_size; @@ -533,6 +534,7 @@ grn_pat_open(grn_ctx *ctx, const char *path) } else { pat->normalizer = grn_ctx_at(ctx, header->normalizer); } + GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); pat->obj.header.flags = header->flags; PAT_AT(pat, 0, node0); if (!node0) { @@ -552,6 +554,7 @@ grn_pat_close(grn_ctx *ctx, grn_pat *pat) if ((rc = grn_io_close(ctx, pat->io))) { ERR(rc, "grn_io_close failed"); } else { + GRN_OBJ_FIN(ctx, &(pat->token_filters)); if (pat->cache) { grn_pat_cache_disable(ctx, pat); } GRN_FREE(pat); } diff --git a/storage/mroonga/vendor/groonga/lib/pat.h b/storage/mroonga/vendor/groonga/lib/pat.h index 09e1fa87992..d2d10322c7c 100644 --- a/storage/mroonga/vendor/groonga/lib/pat.h +++ b/storage/mroonga/vendor/groonga/lib/pat.h @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -39,6 +39,7 @@ struct _grn_pat { uint32_t value_size; grn_obj *tokenizer; grn_obj *normalizer; + grn_obj token_filters; grn_id *cache; uint32_t cache_size; }; diff --git a/storage/mroonga/vendor/groonga/lib/plugin.c b/storage/mroonga/vendor/groonga/lib/plugin.c index 26e4a3d5165..3483cc9a5b0 100644 --- a/storage/mroonga/vendor/groonga/lib/plugin.c +++ b/storage/mroonga/vendor/groonga/lib/plugin.c @@ -375,7 +375,6 @@ grn_plugin_get_system_plugins_dir(void) if (!win32_plugins_dir) { const char *base_dir; const char *relative_path = GRN_RELATIVE_PLUGINS_DIR; - char *path; size_t base_dir_length; base_dir = grn_win32_base_dir(); diff --git a/storage/mroonga/vendor/groonga/lib/proc.c b/storage/mroonga/vendor/groonga/lib/proc.c index e1473472f55..37b7ae67d35 100644 --- a/storage/mroonga/vendor/groonga/lib/proc.c +++ b/storage/mroonga/vendor/groonga/lib/proc.c @@ -1102,6 +1102,12 @@ grn_parse_column_create_flags(grn_ctx *ctx, const char *nptr, const char *end) } else if (!memcmp(nptr, "COLUMN_INDEX", 12)) { flags |= GRN_OBJ_COLUMN_INDEX; nptr += 12; + } else if (!memcmp(nptr, "COMPRESS_ZLIB", 13)) { + flags |= GRN_OBJ_COMPRESS_ZLIB; + nptr += 13; + } else if (!memcmp(nptr, "COMPRESS_LZO", 12)) { + flags |= GRN_OBJ_COMPRESS_LZO; + nptr += 12; } else if (!memcmp(nptr, "WITH_SECTION", 12)) { flags |= GRN_OBJ_WITH_SECTION; nptr += 12; @@ -1194,6 +1200,116 @@ grn_column_create_flags_to_text(grn_ctx *ctx, grn_obj *buf, grn_obj_flags flags) } } +static grn_bool +proc_table_create_set_token_filters_put(grn_ctx *ctx, + grn_obj *token_filters, + const char *token_filter_name, + int token_filter_name_length) +{ + grn_obj *token_filter; + + token_filter = grn_ctx_get(ctx, + token_filter_name, + token_filter_name_length); + if (token_filter) { + GRN_PTR_PUT(ctx, token_filters, token_filter); + return GRN_TRUE; + } else { + ERR(GRN_INVALID_ARGUMENT, + "[table][create][token-filter] nonexistent token filter: <%.*s>", + token_filter_name_length, token_filter_name); + return GRN_FALSE; + } +} + +static grn_bool +proc_table_create_set_token_filters_fill(grn_ctx *ctx, + grn_obj *token_filters, + grn_obj *token_filter_names) +{ + const char *start, *current, *end; + const char *name_start, *name_end; + const char *last_name_end; + + start = GRN_TEXT_VALUE(token_filter_names); + end = start + GRN_TEXT_LEN(token_filter_names); + current = start; + name_start = NULL; + name_end = NULL; + last_name_end = start; + while (current < end) { + switch (current[0]) { + case ' ' : + if (name_start && !name_end) { + name_end = current; + } + break; + case ',' : + if (!name_start) { + goto break_loop; + } + if (!name_end) { + name_end = current; + } + proc_table_create_set_token_filters_put(ctx, + token_filters, + name_start, + name_end - name_start); + last_name_end = name_end + 1; + name_start = NULL; + name_end = NULL; + break; + default : + if (!name_start) { + name_start = current; + } + break; + } + current++; + } + +break_loop: + if (!name_start) { + ERR(GRN_INVALID_ARGUMENT, + "[table][create][token-filter] empty token filter name: " + "<%.*s|%.*s|%.*s>", + (int)(last_name_end - start), start, + (int)(current - last_name_end), last_name_end, + (int)(end - current), current); + return GRN_FALSE; + } + + if (!name_end) { + name_end = current; + } + proc_table_create_set_token_filters_put(ctx, + token_filters, + name_start, + name_end - name_start); + + return GRN_TRUE; +} + +static void +proc_table_create_set_token_filters(grn_ctx *ctx, + grn_obj *table, + grn_obj *token_filter_names) +{ + grn_obj token_filters; + + if (GRN_TEXT_LEN(token_filter_names) == 0) { + return; + } + + GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, 0); + if (proc_table_create_set_token_filters_fill(ctx, + &token_filters, + token_filter_names)) { + grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); + } + grn_obj_unlink(ctx, &token_filters); +} + static grn_obj * proc_table_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { @@ -1251,6 +1367,7 @@ proc_table_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_d GRN_TEXT_VALUE(normalizer_name), GRN_TEXT_LEN(normalizer_name))); } + proc_table_create_set_token_filters(ctx, table, VAR(6)); grn_obj_unlink(ctx, table); } } else { @@ -1316,9 +1433,104 @@ exit: return NULL; } +static grn_rc +proc_column_create_resolve_source_name(grn_ctx *ctx, + grn_obj *table, + const char *source_name, + int source_name_length, + grn_obj *source_ids) +{ + grn_obj *column; + + column = grn_obj_column(ctx, table, source_name, source_name_length); + if (!column) { + ERR(GRN_INVALID_ARGUMENT, + "[column][create] nonexistent source: <%.*s>", + source_name_length, source_name); + return ctx->rc; + } + + if (column->header.type == GRN_ACCESSOR) { + if (strncmp(source_name, "_key", source_name_length) == 0) { + grn_id source_id = grn_obj_id(ctx, table); + GRN_UINT32_PUT(ctx, source_ids, source_id); + } else { + ERR(GRN_INVALID_ARGUMENT, + "[column][create] pseudo column except <_key> is invalid: <%.*s>", + source_name_length, source_name); + } + } else { + grn_id source_id = grn_obj_id(ctx, column); + GRN_UINT32_PUT(ctx, source_ids, source_id); + } + grn_obj_unlink(ctx, column); + + return ctx->rc; +} + +static grn_rc +proc_column_create_resolve_source_names(grn_ctx *ctx, + grn_obj *table, + grn_obj *source_names, + grn_obj *source_ids) +{ + int i, names_length; + int start, source_name_length; + const char *names; + + names = GRN_TEXT_VALUE(source_names); + start = 0; + source_name_length = 0; + names_length = GRN_TEXT_LEN(source_names); + for (i = 0; i < names_length; i++) { + switch (names[i]) { + case ' ' : + if (source_name_length == 0) { + start++; + } + break; + case ',' : + { + grn_rc rc; + const char *source_name = names + start; + rc = proc_column_create_resolve_source_name(ctx, + table, + source_name, + source_name_length, + source_ids); + if (rc) { + return rc; + } + start = i + 1; + source_name_length = 0; + } + break; + default : + source_name_length++; + break; + } + } + + if (source_name_length > 0) { + grn_rc rc; + const char *source_name = names + start; + rc = proc_column_create_resolve_source_name(ctx, + table, + source_name, + source_name_length, + source_ids); + if (rc) { + return rc; + } + } + + return GRN_SUCCESS; +} + static grn_obj * proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { + grn_bool succeeded = GRN_TRUE; grn_obj *column, *table = NULL, *type = NULL; const char *rest; grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(VAR(2)), @@ -1326,13 +1538,17 @@ proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_ if (GRN_TEXT_VALUE(VAR(2)) == rest) { flags = grn_parse_column_create_flags(ctx, GRN_TEXT_VALUE(VAR(2)), GRN_BULK_CURR(VAR(2))); - if (ctx->rc) { goto exit; } + if (ctx->rc) { + succeeded = GRN_FALSE; + goto exit; + } } table = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(0)), GRN_TEXT_LEN(VAR(0))); if (!table) { ERR(GRN_INVALID_ARGUMENT, "[column][create] table doesn't exist: <%.*s>", (int)GRN_TEXT_LEN(VAR(0)), GRN_TEXT_VALUE(VAR(0))); + succeeded = GRN_FALSE; goto exit; } type = grn_ctx_get(ctx, GRN_TEXT_VALUE(VAR(3)), @@ -1341,12 +1557,14 @@ proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_ ERR(GRN_INVALID_ARGUMENT, "[column][create] type doesn't exist: <%.*s>", (int)GRN_TEXT_LEN(VAR(3)), GRN_TEXT_VALUE(VAR(3))) ; + succeeded = GRN_FALSE; goto exit; } if (GRN_TEXT_LEN(VAR(1))) { flags |= GRN_OBJ_PERSISTENT; } else { ERR(GRN_INVALID_ARGUMENT, "[column][create] name is missing"); + succeeded = GRN_FALSE; goto exit; } column = grn_column_create(ctx, table, @@ -1355,36 +1573,30 @@ proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_ NULL, flags, type); if (column) { if (GRN_TEXT_LEN(VAR(4))) { - grn_obj sources, source_ids, **p, **pe; - GRN_PTR_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_rc rc; + grn_obj source_ids; GRN_UINT32_INIT(&source_ids, GRN_OBJ_VECTOR); - grn_obj_columns(ctx, type, - GRN_TEXT_VALUE(VAR(4)), - GRN_TEXT_LEN(VAR(4)), - &sources); - p = (grn_obj **)GRN_BULK_HEAD(&sources); - pe = (grn_obj **)GRN_BULK_CURR(&sources); - for (; p < pe; p++) { - grn_id source_id = grn_obj_id(ctx, *p); - if ((*p)->header.type == GRN_ACCESSOR) { - /* todo : if "_key" assigned */ - source_id = grn_obj_id(ctx, type); - } - if (source_id) { - GRN_UINT32_PUT(ctx, &source_ids, source_id); - } - grn_obj_unlink(ctx, *p); - } - if (GRN_BULK_VSIZE(&source_ids)) { + rc = proc_column_create_resolve_source_names(ctx, + type, + VAR(4), + &source_ids); + if (!rc && GRN_BULK_VSIZE(&source_ids)) { grn_obj_set_info(ctx, column, GRN_INFO_SOURCE, &source_ids); + rc = ctx->rc; } GRN_OBJ_FIN(ctx, &source_ids); - GRN_OBJ_FIN(ctx, &sources); + if (rc) { + grn_obj_remove(ctx, column); + succeeded = GRN_FALSE; + goto exit; + } } grn_obj_unlink(ctx, column); + } else { + succeeded = GRN_FALSE; } exit: - GRN_OUTPUT_BOOL(!ctx->rc); + GRN_OUTPUT_BOOL(succeeded); if (table) { grn_obj_unlink(ctx, table); } if (type) { grn_obj_unlink(ctx, type); } return NULL; @@ -2544,6 +2756,26 @@ dump_table(grn_ctx *ctx, grn_obj *outbuf, grn_obj *table, GRN_TEXT_PUTS(ctx, outbuf, " --normalizer "); dump_obj_name(ctx, outbuf, normalizer); } + if (table->header.type != GRN_TABLE_NO_KEY) { + grn_obj token_filters; + int n_token_filters; + + GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_obj_get_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); + n_token_filters = GRN_BULK_VSIZE(&token_filters) / sizeof(grn_obj *); + if (n_token_filters > 0) { + int i; + GRN_TEXT_PUTS(ctx, outbuf, " --token_filters "); + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(&token_filters, i); + if (i > 0) { + GRN_TEXT_PUTC(ctx, outbuf, ','); + } + dump_obj_name(ctx, outbuf, token_filter); + } + } + GRN_OBJ_FIN(ctx, &token_filters); + } GRN_TEXT_PUTC(ctx, outbuf, '\n'); @@ -3274,17 +3506,18 @@ static void tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, grn_token_mode mode, unsigned int flags, grn_obj *tokens) { - grn_token *token; + grn_token_cursor *token_cursor; - token = grn_token_open(ctx, (grn_obj *)lexicon, - GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), - mode, flags); - if (!token) { + token_cursor = + grn_token_cursor_open(ctx, (grn_obj *)lexicon, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + mode, flags); + if (!token_cursor) { return; } - while (token->status == GRN_TOKEN_DOING) { - grn_id token_id = grn_token_next(ctx, token); + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_id token_id = grn_token_cursor_next(ctx, token_cursor); tokenize_token *current_token; if (token_id == GRN_ID_NIL) { continue; @@ -3292,9 +3525,9 @@ tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, grn_token_mode mode, grn_bulk_space(ctx, tokens, sizeof(tokenize_token)); current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1; current_token->id = token_id; - current_token->position = token->pos; + current_token->position = token_cursor->pos; } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } static grn_obj * @@ -3348,15 +3581,16 @@ proc_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) GRN_OBJ_FIN(ctx, &tokens); } else if (MODE_NAME_EQUAL("GET")) { { - grn_token *token; - token = grn_token_open(ctx, (grn_obj *)lexicon, - GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), - GRN_TOKEN_ADD, flags); - if (token) { - while (token->status == GRN_TOKEN_DOING) { - grn_token_next(ctx, token); + grn_token_cursor *token_cursor; + token_cursor = + grn_token_cursor_open(ctx, (grn_obj *)lexicon, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + GRN_TOKEN_ADD, flags); + if (token_cursor) { + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_token_cursor_next(ctx, token_cursor); } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } @@ -5076,7 +5310,8 @@ grn_db_init_builtin_query(grn_ctx *ctx) DEF_VAR(vars[3], "value_type"); DEF_VAR(vars[4], "default_tokenizer"); DEF_VAR(vars[5], "normalizer"); - DEF_COMMAND("table_create", proc_table_create, 6, vars); + DEF_VAR(vars[6], "token_filters"); + DEF_COMMAND("table_create", proc_table_create, 7, vars); DEF_VAR(vars[0], "name"); DEF_COMMAND("table_remove", proc_table_remove, 1, vars); diff --git a/storage/mroonga/vendor/groonga/lib/snip.c b/storage/mroonga/vendor/groonga/lib/snip.c index f5ed677b091..78c6ef54387 100644 --- a/storage/mroonga/vendor/groonga/lib/snip.c +++ b/storage/mroonga/vendor/groonga/lib/snip.c @@ -434,7 +434,7 @@ grn_snip_find_firstbyte(const char *string, grn_encoding encoding, size_t offset offset += doffset; break; case GRN_ENC_UTF8: - while (string[offset] <= (char)0xc0) + while ((signed char)string[offset] <= (signed char)0xc0) offset += doffset; break; default: diff --git a/storage/mroonga/vendor/groonga/lib/sources.am b/storage/mroonga/vendor/groonga/lib/sources.am index 9e726b1607b..8b371081fe6 100644 --- a/storage/mroonga/vendor/groonga/lib/sources.am +++ b/storage/mroonga/vendor/groonga/lib/sources.am @@ -47,5 +47,6 @@ libgroonga_la_SOURCES = \ token.c \ token.h \ tokenizer.c \ + token_filter.c \ util.c \ util.h diff --git a/storage/mroonga/vendor/groonga/lib/store.c b/storage/mroonga/vendor/groonga/lib/store.c index 041c01ec3f8..3649611a560 100644 --- a/storage/mroonga/vendor/groonga/lib/store.c +++ b/storage/mroonga/vendor/groonga/lib/store.c @@ -524,6 +524,7 @@ grn_ja_ref_raw(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *va iw->size = 0; iw->addr = NULL; iw->pseg = pseg; + iw->value = NULL; if (pseg != JA_ESEG_VOID) { grn_ja_einfo *einfo = NULL; GRN_IO_SEG_REF(ja->io, pseg, einfo); @@ -556,9 +557,14 @@ grn_ja_ref_raw(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *va grn_rc grn_ja_unref(grn_ctx *ctx, grn_io_win *iw) { - if (!iw->addr) { return GRN_INVALID_ARGUMENT; } - GRN_IO_SEG_UNREF(iw->io, iw->pseg); - if (!iw->tiny_p) { grn_io_win_unmap2(iw); } + if (iw->value) { + GRN_FREE(iw->value); + iw->value = NULL; + } else { + if (!iw->addr) { return GRN_INVALID_ARGUMENT; } + GRN_IO_SEG_UNREF(iw->io, iw->pseg); + if (!iw->tiny_p) { grn_io_win_unmap2(iw); } + } return GRN_SUCCESS; } @@ -858,6 +864,7 @@ grn_ja_alloc(grn_ctx *ctx, grn_ja *ja, grn_id id, vp->seg = 0; vp->pos = 0; } + iw->value = NULL; grn_io_unlock(ja->io); return GRN_SUCCESS; } @@ -1176,12 +1183,11 @@ grn_ja_element_info(grn_ctx *ctx, grn_ja *ja, grn_id id, static void * grn_ja_ref_zlib(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *value_len) { - /* TODO: This function leaks a memory. The return value - * must be freed. */ z_stream zstream; - void *value, *zvalue; + void *zvalue; uint32_t zvalue_len; if (!(zvalue = grn_ja_ref_raw(ctx, ja, id, iw, &zvalue_len))) { + iw->value = NULL; *value_len = 0; return NULL; } @@ -1190,29 +1196,33 @@ grn_ja_ref_zlib(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *v zstream.zalloc = Z_NULL; zstream.zfree = Z_NULL; if (inflateInit2(&zstream, 15 /* windowBits */) != Z_OK) { + iw->value = NULL; *value_len = 0; return NULL; } - if (!(value = GRN_MALLOC(*((uint64_t *)zvalue)))) { + if (!(iw->value = GRN_MALLOC(*((uint64_t *)zvalue)))) { inflateEnd(&zstream); + iw->value = NULL; *value_len = 0; return NULL; } - zstream.next_out = (Bytef *)value; + zstream.next_out = (Bytef *)iw->value; zstream.avail_out = *(uint64_t *)zvalue; if (inflate(&zstream, Z_FINISH) != Z_STREAM_END) { inflateEnd(&zstream); - GRN_FREE(value); + GRN_FREE(iw->value); + iw->value = NULL; *value_len = 0; return NULL; } *value_len = zstream.total_out; if (inflateEnd(&zstream) != Z_OK) { - GRN_FREE(value); + GRN_FREE(iw->value); + iw->value = NULL; *value_len = 0; return NULL; } - return value; + return iw->value; } #endif /* GRN_WITH_ZLIB */ @@ -1222,35 +1232,36 @@ grn_ja_ref_zlib(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *v static void * grn_ja_ref_lzo(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_io_win *iw, uint32_t *value_len) { - /* TODO: This function leaks a memory. The return value - * must be freed. */ - void *value, *lvalue; + void *lvalue; uint32_t lvalue_len; lzo_uint lout_len; if (!(lvalue = grn_ja_ref_raw(ctx, ja, id, iw, &lvalue_len))) { + iw->value = NULL; *value_len = 0; return NULL; } - if (!(value = GRN_MALLOC(*((uint64_t *)lvalue)))) { + if (!(iw->value = GRN_MALLOC(*((uint64_t *)lvalue)))) { + iw->value = NULL; *value_len = 0; return NULL; } lout_len = *((uint64_t *)lvalue); switch (lzo1x_decompress((lzo_bytep)(((uint64_t *)lvalue) + 1), lvalue_len, - (lzo_bytep)(value), + (lzo_bytep)(iw->value), &lout_len, NULL)) { case LZO_E_OK : case LZO_E_INPUT_NOT_CONSUMED : break; default : - GRN_FREE(value); + GRN_FREE(iw->value); + iw->value = NULL; *value_len = 0; return NULL; } *value_len = lout_len; - return value; + return iw->value; } #endif /* GRN_WITH_LZO */ diff --git a/storage/mroonga/vendor/groonga/lib/str.c b/storage/mroonga/vendor/groonga/lib/str.c index 255f8020a7b..29e532a8021 100644 --- a/storage/mroonga/vendor/groonga/lib/str.c +++ b/storage/mroonga/vendor/groonga/lib/str.c @@ -2418,7 +2418,7 @@ grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len) { const char *e, c = '%'; for (e = s + len; s < e; s++) { - if (*s < 0 || urlenc_tbl[(int)*s]) { + if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) { if (!grn_bulk_write(ctx, buf, &c, 1)) { if (grn_text_itoh(ctx, buf, *s, 2)) { GRN_BULK_INCR_LEN(buf, -1); diff --git a/storage/mroonga/vendor/groonga/lib/token.c b/storage/mroonga/vendor/groonga/lib/token.c index edce2edcb82..00b3de2ebdd 100644 --- a/storage/mroonga/vendor/groonga/lib/token.c +++ b/storage/mroonga/vendor/groonga/lib/token.c @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2012 Brazil + Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -495,32 +495,59 @@ grn_token_fin(void) return GRN_SUCCESS; } -grn_token * -grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, - grn_token_mode mode, unsigned int flags) +static void +grn_token_cursor_open_initialize_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor) { - grn_token *token; + grn_obj *token_filters = token_cursor->token_filters; + unsigned int i, n_token_filters; + + if (token_filters) { + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + } else { + n_token_filters = 0; + } + + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i); + grn_proc *token_filter = (grn_proc *)token_filter_object; + + token_filter->user_data = + token_filter->callbacks.token_filter.init(ctx, + token_cursor->table, + token_cursor->mode); + } +} + +grn_token_cursor * +grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, + const char *str, size_t str_len, + grn_token_mode mode, unsigned int flags) +{ + grn_token_cursor *token_cursor; grn_encoding encoding; grn_obj *tokenizer; grn_obj *normalizer; + grn_obj *token_filters; grn_obj_flags table_flags; if (grn_table_get_info(ctx, table, &table_flags, &encoding, &tokenizer, - &normalizer)) { + &normalizer, &token_filters)) { return NULL; } - if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } - token->table = table; - token->mode = mode; - token->encoding = encoding; - token->tokenizer = tokenizer; - token->orig = (const unsigned char *)str; - token->orig_blen = str_len; - token->curr = NULL; - token->nstr = NULL; - token->curr_size = 0; - token->pos = -1; - token->status = GRN_TOKEN_DOING; - token->force_prefix = 0; + if (!(token_cursor = GRN_MALLOC(sizeof(grn_token_cursor)))) { return NULL; } + token_cursor->table = table; + token_cursor->mode = mode; + token_cursor->encoding = encoding; + token_cursor->tokenizer = tokenizer; + token_cursor->token_filters = token_filters; + token_cursor->orig = (const unsigned char *)str; + token_cursor->orig_blen = str_len; + token_cursor->curr = NULL; + token_cursor->nstr = NULL; + token_cursor->curr_size = 0; + token_cursor->pos = -1; + token_cursor->status = GRN_TOKEN_DOING; + token_cursor->force_prefix = 0; if (tokenizer) { grn_obj str_, flags_, mode_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); @@ -529,104 +556,175 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, GRN_UINT32_SET(ctx, &flags_, flags); GRN_UINT32_INIT(&mode_, 0); GRN_UINT32_SET(ctx, &mode_, mode); - token->pctx.caller = NULL; - token->pctx.user_data.ptr = NULL; - token->pctx.proc = (grn_proc *)tokenizer; - token->pctx.hooks = NULL; - token->pctx.currh = NULL; - token->pctx.phase = PROC_INIT; + token_cursor->pctx.caller = NULL; + token_cursor->pctx.user_data.ptr = NULL; + token_cursor->pctx.proc = (grn_proc *)tokenizer; + token_cursor->pctx.hooks = NULL; + token_cursor->pctx.currh = NULL; + token_cursor->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &mode_); grn_ctx_push(ctx, &str_); grn_ctx_push(ctx, &flags_); - ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); + ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token_cursor->pctx.user_data); grn_obj_close(ctx, &flags_); grn_obj_close(ctx, &str_); grn_obj_close(ctx, &mode_); } else { int nflags = 0; - token->nstr = grn_string_open_(ctx, str, str_len, - normalizer, nflags, token->encoding); - if (token->nstr) { + token_cursor->nstr = grn_string_open_(ctx, str, str_len, + normalizer, + nflags, + token_cursor->encoding); + if (token_cursor->nstr) { const char *normalized; - grn_string_get_normalized(ctx, token->nstr, - &normalized, &(token->curr_size), NULL); - token->curr = (const unsigned char *)normalized; + grn_string_get_normalized(ctx, token_cursor->nstr, + &normalized, &(token_cursor->curr_size), NULL); + token_cursor->curr = (const unsigned char *)normalized; } else { - ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open"); + ERR(GRN_TOKENIZER_ERROR, + "[token-cursor][open] failed to grn_string_open()"); } } + + grn_token_cursor_open_initialize_token_filters(ctx, token_cursor); + if (ctx->rc) { - grn_token_close(ctx, token); - token = NULL; + grn_token_cursor_close(ctx, token_cursor); + token_cursor = NULL; + } + return token_cursor; +} + +static int +grn_token_cursor_next_apply_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor, + grn_obj *current_token_data, + grn_obj *status) +{ + grn_obj *token_filters = token_cursor->token_filters; + unsigned int i, n_token_filters; + grn_token current_token; + grn_token next_token; + + if (token_filters) { + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + } else { + n_token_filters = 0; + } + + GRN_TEXT_INIT(&(current_token.data), GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SET(ctx, &(current_token.data), + GRN_TEXT_VALUE(current_token_data), + GRN_TEXT_LEN(current_token_data)); + current_token.status = GRN_INT32_VALUE(status); + GRN_TEXT_INIT(&(next_token.data), GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SET(ctx, &(next_token.data), + GRN_TEXT_VALUE(&(current_token.data)), + GRN_TEXT_LEN(&(current_token.data))); + next_token.status = current_token.status; + + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i); + grn_proc *token_filter = (grn_proc *)token_filter_object; + +#define SKIP_FLAGS\ + (GRN_TOKENIZER_TOKEN_SKIP |\ + GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) + if (current_token.status & SKIP_FLAGS) { + break; + } +#undef SKIP_FLAGS + + token_filter->callbacks.token_filter.filter(ctx, + ¤t_token, + &next_token, + token_filter->user_data); + GRN_TEXT_SET(ctx, &(current_token.data), + GRN_TEXT_VALUE(&(next_token.data)), + GRN_TEXT_LEN(&(next_token.data))); + current_token.status = next_token.status; } - return token; + + token_cursor->curr = + (const unsigned char *)GRN_TEXT_VALUE(&(current_token.data)); + token_cursor->curr_size = GRN_TEXT_LEN(&(current_token.data)); + + return current_token.status; } grn_id -grn_token_next(grn_ctx *ctx, grn_token *token) +grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) { int status; grn_id tid = GRN_ID_NIL; - grn_obj *table = token->table; - grn_obj *tokenizer = token->tokenizer; - while (token->status != GRN_TOKEN_DONE) { + grn_obj *table = token_cursor->table; + grn_obj *tokenizer = token_cursor->tokenizer; + while (token_cursor->status != GRN_TOKEN_DONE) { if (tokenizer) { grn_obj *curr_, *stat_; - ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data); + ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token_cursor->pctx.user_data); stat_ = grn_ctx_pop(ctx); curr_ = grn_ctx_pop(ctx); - token->curr = (const unsigned char *)GRN_TEXT_VALUE(curr_); - token->curr_size = GRN_TEXT_LEN(curr_); - status = GRN_UINT32_VALUE(stat_); - token->status = ((status & GRN_TOKENIZER_TOKEN_LAST) || - (token->mode == GRN_TOKEN_GET && - (status & GRN_TOKENIZER_TOKEN_REACH_END))) + status = grn_token_cursor_next_apply_token_filters(ctx, token_cursor, + curr_, stat_); + token_cursor->status = + ((status & GRN_TOKENIZER_TOKEN_LAST) || + (token_cursor->mode == GRN_TOKEN_GET && + (status & GRN_TOKENIZER_TOKEN_REACH_END))) ? GRN_TOKEN_DONE : GRN_TOKEN_DOING; - token->force_prefix = 0; - if (status & GRN_TOKENIZER_TOKEN_SKIP) { - token->pos++; - continue; - } else if (status & GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) { - continue; + token_cursor->force_prefix = 0; +#define SKIP_FLAGS \ + (GRN_TOKENIZER_TOKEN_SKIP | GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) + if (status & SKIP_FLAGS) { + if (status & GRN_TOKENIZER_TOKEN_SKIP) { + token_cursor->pos++; + } + if (token_cursor->status == GRN_TOKEN_DONE && tid == GRN_ID_NIL) { + token_cursor->status = GRN_TOKEN_DONE_SKIP; + break; + } else { + continue; + } } - if (token->curr_size == 0) { +#undef SKIP_FLAGS + if (token_cursor->curr_size == 0) { char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE]; int tokenizer_name_length; tokenizer_name_length = - grn_obj_name(ctx, token->tokenizer, + grn_obj_name(ctx, token_cursor->tokenizer, tokenizer_name, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_WARN, "[token_next] ignore an empty token: <%.*s>: <%.*s>", tokenizer_name_length, tokenizer_name, - token->orig_blen, token->orig); + token_cursor->orig_blen, token_cursor->orig); continue; } - if (token->curr_size > GRN_TABLE_MAX_KEY_SIZE) { + if (token_cursor->curr_size > GRN_TABLE_MAX_KEY_SIZE) { GRN_LOG(ctx, GRN_WARN, "[token_next] ignore too long token. " "Token must be less than or equal to %d: <%d>(<%.*s>)", GRN_TABLE_MAX_KEY_SIZE, - token->curr_size, - token->curr_size, token->curr); + token_cursor->curr_size, + token_cursor->curr_size, token_cursor->curr); continue; } if (status & GRN_TOKENIZER_TOKEN_UNMATURED) { if (status & GRN_TOKENIZER_TOKEN_OVERLAP) { - if (token->mode == GRN_TOKEN_GET) { token->pos++; continue; } + if (token_cursor->mode == GRN_TOKEN_GET) { token_cursor->pos++; continue; } } else { - if (status & GRN_TOKENIZER_TOKEN_LAST) { token->force_prefix = 1; } + if (status & GRN_TOKENIZER_TOKEN_LAST) { token_cursor->force_prefix = 1; } } } } else { - token->status = GRN_TOKEN_DONE; + token_cursor->status = GRN_TOKEN_DONE; } - if (token->mode == GRN_TOKEN_ADD) { + if (token_cursor->mode == GRN_TOKEN_ADD) { switch (table->header.type) { case GRN_TABLE_PAT_KEY : if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_pat_add(ctx, (grn_pat *)table, token->curr, token->curr_size, + tid = grn_pat_add(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_pat *)table)->io); } @@ -635,7 +733,7 @@ grn_token_next(grn_ctx *ctx, grn_token *token) if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_dat_add(ctx, (grn_dat *)table, token->curr, token->curr_size, + tid = grn_dat_add(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_dat *)table)->io); } @@ -644,14 +742,14 @@ grn_token_next(grn_ctx *ctx, grn_token *token) if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_hash_add(ctx, (grn_hash *)table, token->curr, token->curr_size, + tid = grn_hash_add(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_hash *)table)->io); } break; case GRN_TABLE_NO_KEY : - if (token->curr_size == sizeof(grn_id)) { - tid = *((grn_id *)token->curr); + if (token_cursor->curr_size == sizeof(grn_id)) { + tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } @@ -660,44 +758,65 @@ grn_token_next(grn_ctx *ctx, grn_token *token) } else { switch (table->header.type) { case GRN_TABLE_PAT_KEY : - tid = grn_pat_get(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL); + tid = grn_pat_get(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_DAT_KEY : - tid = grn_dat_get(ctx, (grn_dat *)table, token->curr, token->curr_size, NULL); + tid = grn_dat_get(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_HASH_KEY : - tid = grn_hash_get(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL); + tid = grn_hash_get(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_NO_KEY : - if (token->curr_size == sizeof(grn_id)) { - tid = *((grn_id *)token->curr); + if (token_cursor->curr_size == sizeof(grn_id)) { + tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } break; } } - if (tid == GRN_ID_NIL && token->status != GRN_TOKEN_DONE) { - token->status = GRN_TOKEN_NOT_FOUND; + if (tid == GRN_ID_NIL && token_cursor->status != GRN_TOKEN_DONE) { + token_cursor->status = GRN_TOKEN_NOT_FOUND; } - token->pos++; + token_cursor->pos++; break; } return tid; } +static void +grn_token_cursor_close_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor) +{ + grn_obj *token_filters = token_cursor->token_filters; + unsigned int i, n_token_filters; + + if (token_filters) { + n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + } else { + n_token_filters = 0; + } + for (i = 0; i < n_token_filters; i++) { + grn_obj *token_filter_object = GRN_PTR_VALUE_AT(token_filters, i); + grn_proc *token_filter = (grn_proc *)token_filter_object; + + token_filter->callbacks.token_filter.fin(ctx, token_filter->user_data); + } +} + grn_rc -grn_token_close(grn_ctx *ctx, grn_token *token) +grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor) { - if (token) { - if (token->tokenizer) { - ((grn_proc *)token->tokenizer)->funcs[PROC_FIN](ctx, 1, &token->table, - &token->pctx.user_data); + if (token_cursor) { + if (token_cursor->tokenizer) { + ((grn_proc *)token_cursor->tokenizer)->funcs[PROC_FIN](ctx, 1, &token_cursor->table, + &token_cursor->pctx.user_data); } - if (token->nstr) { - grn_obj_close(ctx, token->nstr); + grn_token_cursor_close_token_filters(ctx, token_cursor); + if (token_cursor->nstr) { + grn_obj_close(ctx, token_cursor->nstr); } - GRN_FREE(token); + GRN_FREE(token_cursor); return GRN_SUCCESS; } else { return GRN_INVALID_ARGUMENT; diff --git a/storage/mroonga/vendor/groonga/lib/token.h b/storage/mroonga/vendor/groonga/lib/token.h index f69e30d3aaf..868930758ba 100644 --- a/storage/mroonga/vendor/groonga/lib/token.h +++ b/storage/mroonga/vendor/groonga/lib/token.h @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -33,22 +33,24 @@ #include "str.h" #endif /* GRN_STR_H */ +#include + #ifdef __cplusplus extern "C" { #endif -typedef enum { - GRN_TOKEN_GET = 0, - GRN_TOKEN_ADD, - GRN_TOKEN_DEL -} grn_token_mode; - typedef enum { GRN_TOKEN_DOING = 0, GRN_TOKEN_DONE, + GRN_TOKEN_DONE_SKIP, GRN_TOKEN_NOT_FOUND } grn_token_status; +struct _grn_token { + grn_obj data; + grn_tokenizer_status status; +}; + typedef struct { grn_obj *table; const unsigned char *orig; @@ -63,9 +65,10 @@ typedef struct { grn_encoding encoding; grn_obj *tokenizer; grn_proc_ctx pctx; + grn_obj *token_filters; uint32_t variant; grn_obj *nstr; -} grn_token; +} grn_token_cursor; extern grn_obj *grn_token_uvector; @@ -74,12 +77,13 @@ grn_rc grn_token_fin(void); #define GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER (0x01L<<0) -GRN_API grn_token *grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, - size_t str_len, grn_token_mode mode, - unsigned int flags); +GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, + const char *str, size_t str_len, + grn_token_mode mode, + unsigned int flags); -GRN_API grn_id grn_token_next(grn_ctx *ctx, grn_token *ng); -GRN_API grn_rc grn_token_close(grn_ctx *ctx, grn_token *ng); +GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor); +GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor); grn_rc grn_db_init_mecab_tokenizer(grn_ctx *ctx); grn_rc grn_db_init_builtin_tokenizers(grn_ctx *ctx); diff --git a/storage/mroonga/vendor/groonga/lib/token_filter.c b/storage/mroonga/vendor/groonga/lib/token_filter.c new file mode 100644 index 00000000000..910bb9a5715 --- /dev/null +++ b/storage/mroonga/vendor/groonga/lib/token_filter.c @@ -0,0 +1,59 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2014 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include + +#include "groonga_in.h" +#include "db.h" +#include + +grn_rc +grn_token_filter_register(grn_ctx *ctx, + const char *plugin_name_ptr, + int plugin_name_length, + grn_token_filter_init_func *init, + grn_token_filter_filter_func *filter, + grn_token_filter_fin_func *fin) +{ + if (plugin_name_length == -1) { + plugin_name_length = strlen(plugin_name_ptr); + } + + { + grn_obj *token_filter_object = grn_proc_create(ctx, + plugin_name_ptr, + plugin_name_length, + GRN_PROC_TOKENIZER, + NULL, NULL, NULL, 0, NULL); + if (token_filter_object == NULL) { + GRN_PLUGIN_ERROR(ctx, GRN_TOKEN_FILTER_ERROR, + "[token-filter][%.*s] failed to grn_proc_create()", + plugin_name_length, plugin_name_ptr); + return ctx->rc; + } + + { + grn_proc *token_filter = (grn_proc *)token_filter_object; + token_filter->callbacks.token_filter.init = init; + token_filter->callbacks.token_filter.filter = filter; + token_filter->callbacks.token_filter.fin = fin; + } + } + + return GRN_SUCCESS; +} diff --git a/storage/mroonga/vendor/groonga/lib/tokenizer.c b/storage/mroonga/vendor/groonga/lib/tokenizer.c index 37d22789663..ac4628ba5de 100644 --- a/storage/mroonga/vendor/groonga/lib/tokenizer.c +++ b/storage/mroonga/vendor/groonga/lib/tokenizer.c @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2012 Brazil + Copyright(C) 2012-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -148,7 +148,7 @@ grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args, return NULL; } grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL, - &normalizer); + &normalizer, NULL); { grn_obj *normalized_query; if (table_flags & GRN_OBJ_KEY_NORMALIZE) { @@ -318,3 +318,59 @@ grn_tokenizer_register(grn_ctx *ctx, const char *plugin_name_ptr, } return GRN_SUCCESS; } + +grn_obj * +grn_token_get_data(grn_ctx *ctx, grn_token *token) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, "token must not be NULL"); + GRN_API_RETURN(NULL); + } + GRN_API_RETURN(&(token->data)); +} + +grn_rc +grn_token_set_data(grn_ctx *ctx, + grn_token *token, + const char *str_ptr, + int str_length) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, "token must not be NULL"); + goto exit; + } + if (str_length == -1) { + str_length = strlen(str_ptr); + } + GRN_TEXT_SET(ctx, &(token->data), str_ptr, str_length); +exit: + GRN_API_RETURN(ctx->rc); +} + +grn_tokenizer_status +grn_token_get_status(grn_ctx *ctx, grn_token *token) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, "token must not be NULL"); + GRN_API_RETURN(GRN_TOKENIZER_TOKEN_CONTINUE); + } + GRN_API_RETURN(token->status); +} + +grn_rc +grn_token_set_status(grn_ctx *ctx, + grn_token *token, + grn_tokenizer_status status) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, "token must not be NULL"); + goto exit; + } + token->status = status; +exit: + GRN_API_RETURN(ctx->rc); +} diff --git a/storage/mroonga/vendor/groonga/lib/util.c b/storage/mroonga/vendor/groonga/lib/util.c index 62794abb856..67f0ff32125 100644 --- a/storage/mroonga/vendor/groonga/lib/util.c +++ b/storage/mroonga/vendor/groonga/lib/util.c @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2010-2013 Brazil +/* Copyright(C) 2010-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -251,6 +251,9 @@ grn_proc_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *obj) case GRN_PROC_NORMALIZER : GRN_TEXT_PUTS(ctx, buf, "normalizer"); break; + case GRN_PROC_TOKEN_FILTER : + GRN_TEXT_PUTS(ctx, buf, "token-filter"); + break; } GRN_TEXT_PUTS(ctx, buf, " "); @@ -294,7 +297,7 @@ grn_vector_inspect(grn_ctx *ctx, grn_obj *buffer, grn_obj *vector) grn_obj value_object; GRN_OBJ_INIT(&value_object, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, section->domain); - grn_bulk_write(ctx, &value_object, value_raw, section->length); + GRN_TEXT_SET(ctx, &value_object, value_raw, section->length); grn_inspect(ctx, buffer, &value_object); GRN_OBJ_FIN(ctx, &value_object); } -- cgit v1.2.1