diff options
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/expr.c')
-rw-r--r-- | storage/mroonga/vendor/groonga/lib/expr.c | 1137 |
1 files changed, 423 insertions, 714 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/expr.c b/storage/mroonga/vendor/groonga/lib/expr.c index ef477eb77ee..f13502a0f14 100644 --- a/storage/mroonga/vendor/groonga/lib/expr.c +++ b/storage/mroonga/vendor/groonga/lib/expr.c @@ -19,12 +19,10 @@ #include "grn_db.h" #include "grn_ctx_impl.h" #include <string.h> -#include <float.h> #include "grn_ii.h" #include "grn_geo.h" #include "grn_expr.h" #include "grn_util.h" -#include "grn_normalizer.h" #include "grn_mrb.h" #include "mrb/mrb_expr.h" @@ -357,7 +355,7 @@ grn_expr_open(grn_ctx *ctx, grn_obj_spec *spec, const uint8_t *p, const uint8_t { grn_expr *expr = NULL; if ((expr = GRN_MALLOCN(grn_expr, 1))) { - int size = 256; + int size = GRN_STACK_SIZE; expr->consts = NULL; expr->nconsts = 0; GRN_TEXT_INIT(&expr->name_buf, 0); @@ -722,6 +720,21 @@ grn_expr_get_var_by_offset(grn_ctx *ctx, grn_obj *expr, unsigned int offset) DFI_PUT(e, type, domain, code); \ } while (0) +static void +grn_expr_append_obj_resolve_const(grn_ctx *ctx, + grn_obj *obj, + grn_id to_domain) +{ + grn_obj dest; + + GRN_OBJ_INIT(&dest, GRN_BULK, 0, to_domain); + if (!grn_obj_cast(ctx, obj, &dest, GRN_FALSE)) { + grn_obj_reinit(ctx, obj, to_domain, 0); + grn_bulk_write(ctx, obj, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); + } + GRN_OBJ_FIN(ctx, &dest); +} + grn_obj * grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, int nargs) { @@ -854,26 +867,14 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, if (CONSTP(y)) { /* todo */ } else { - grn_obj dest; if (xd != yd) { - GRN_OBJ_INIT(&dest, GRN_BULK, 0, yd); - if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) { - grn_obj_reinit(ctx, x, yd, 0); - grn_bulk_write(ctx, x, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); - } - GRN_OBJ_FIN(ctx, &dest); + grn_expr_append_obj_resolve_const(ctx, x, yd); } } } else { if (CONSTP(y)) { - grn_obj dest; if (xd != yd) { - GRN_OBJ_INIT(&dest, GRN_BULK, 0, xd); - if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) { - grn_obj_reinit(ctx, y, xd, 0); - grn_bulk_write(ctx, y, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); - } - GRN_OBJ_FIN(ctx, &dest); + grn_expr_append_obj_resolve_const(ctx, y, xd); } } } @@ -908,6 +909,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, case GRN_OP_GET_REF : case GRN_OP_ADJUST : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : PUSH_CODE(e, op, obj, nargs, code); if (nargs) { int i = nargs - 1; @@ -2223,274 +2225,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) } \ } while (0) -static grn_bool -string_is_contained(grn_ctx *ctx, - const char *text, unsigned int text_len, - const char *sub_text, unsigned int sub_text_len) -{ - /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that - * is used in snip.c. */ - const char *text_end = text + text_len; - unsigned int sub_text_current = 0; - - for (; text < text_end; text++) { - if (text[0] == sub_text[sub_text_current]) { - sub_text_current++; - if (sub_text_current == sub_text_len) { - return GRN_TRUE; - } - } else { - sub_text_current = 0; - } - } - - return GRN_FALSE; -} - -static grn_bool -pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx, - const char *x, unsigned int x_len, - const char *y, unsigned int y_len) -{ - grn_obj *normalizer; - grn_obj *norm_x; - grn_obj *norm_y; - const char *norm_x_raw; - const char *norm_y_raw; - unsigned int norm_x_raw_length_in_bytes; - unsigned int norm_y_raw_length_in_bytes; - grn_bool matched = GRN_FALSE; - - if (x_len == 0 || y_len == 0) { - return GRN_FALSE; - } - - normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); - norm_x = grn_string_open(ctx, x, x_len, normalizer, 0); - norm_y = grn_string_open(ctx, y, y_len, normalizer, 0); - grn_string_get_normalized(ctx, norm_x, - &norm_x_raw, &norm_x_raw_length_in_bytes, - NULL); - grn_string_get_normalized(ctx, norm_y, - &norm_y_raw, &norm_y_raw_length_in_bytes, - NULL); - matched = string_is_contained(ctx, - norm_x_raw, norm_x_raw_length_in_bytes, - norm_y_raw, norm_y_raw_length_in_bytes); - - grn_obj_close(ctx, norm_x); - grn_obj_close(ctx, norm_y); - grn_obj_unlink(ctx, normalizer); - - return matched; -} - -static grn_bool -pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table, - grn_obj *y) -{ - grn_obj *normalizer; - char x_key[GRN_TABLE_MAX_KEY_SIZE]; - int x_key_len; - grn_bool matched = GRN_FALSE; - - if (table->header.domain != GRN_DB_SHORT_TEXT) { - return GRN_FALSE; - } - - x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record), - x_key, GRN_TABLE_MAX_KEY_SIZE); - grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL); - if (normalizer) { - grn_obj *norm_y; - const char *norm_y_raw; - unsigned int norm_y_raw_length_in_bytes; - norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y), - normalizer, 0); - grn_string_get_normalized(ctx, norm_y, - &norm_y_raw, &norm_y_raw_length_in_bytes, - NULL); - matched = string_is_contained(ctx, - x_key, x_key_len, - norm_y_raw, norm_y_raw_length_in_bytes); - grn_obj_close(ctx, norm_y); - } else { - matched = pseudo_query_scan_raw_text_raw_text(ctx, - x_key, - x_key_len, - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); - } - - return matched; -} - -static grn_bool -pseudo_query_scan_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - return pseudo_query_scan_raw_text_raw_text(ctx, - GRN_TEXT_VALUE(x), - GRN_TEXT_LEN(x), - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); -} - -static grn_bool -pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - switch (x->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_query_scan_text_text(ctx, x, y); - default : - break; - } - return GRN_FALSE; - default: - { - grn_obj *domain; - domain = grn_ctx_at(ctx, x->header.domain); - if (GRN_OBJ_TABLEP(domain)) { - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_query_scan_record_text(ctx, x, domain, y); - default : - break; - } - } - } - return GRN_FALSE; - } -} - -static grn_bool -pseudo_prefix_search_match(grn_ctx *ctx, - const char *x, unsigned int x_len, - const char *y, unsigned int y_len) -{ - return (x_len >= y_len && strncmp(x, y, y_len) == 0); -} - -static grn_bool -pseudo_prefix_search_raw_text_raw_text(grn_ctx *ctx, - const char *x, unsigned int x_len, - const char *y, unsigned int y_len) -{ - grn_obj *normalizer; - grn_obj *norm_x; - grn_obj *norm_y; - const char *norm_x_raw; - const char *norm_y_raw; - unsigned int norm_x_raw_len; - unsigned int norm_y_raw_len; - grn_bool matched = GRN_FALSE; - - normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); - norm_x = grn_string_open(ctx, x, x_len, normalizer, 0); - norm_y = grn_string_open(ctx, y, y_len, normalizer, 0); - grn_string_get_normalized(ctx, norm_x, &norm_x_raw, &norm_x_raw_len, NULL); - grn_string_get_normalized(ctx, norm_y, &norm_y_raw, &norm_y_raw_len, NULL); - matched = pseudo_prefix_search_match(ctx, - norm_x_raw, norm_x_raw_len, - norm_y_raw, norm_y_raw_len); - - grn_obj_close(ctx, norm_x); - grn_obj_close(ctx, norm_y); - grn_obj_unlink(ctx, normalizer); - - return matched; -} - -static grn_bool -pseudo_prefix_search_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table, - grn_obj *y) -{ - grn_obj *normalizer; - char x_key[GRN_TABLE_MAX_KEY_SIZE]; - int x_key_len; - grn_bool matched = GRN_FALSE; - - if (table->header.domain != GRN_DB_SHORT_TEXT) { - return GRN_FALSE; - } - - x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record), - x_key, GRN_TABLE_MAX_KEY_SIZE); - grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL); - if (normalizer) { - grn_obj *norm_y; - const char *norm_y_raw; - unsigned int norm_y_raw_len; - norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y), - normalizer, 0); - grn_string_get_normalized(ctx, norm_y, &norm_y_raw, &norm_y_raw_len, NULL); - matched = pseudo_prefix_search_match(ctx, - x_key, x_key_len, - norm_y_raw, norm_y_raw_len); - grn_obj_close(ctx, norm_y); - } else { - matched = pseudo_prefix_search_raw_text_raw_text(ctx, - x_key, - x_key_len, - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); - } - - return matched; -} - -static grn_bool -pseudo_prefix_search_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - return pseudo_prefix_search_raw_text_raw_text(ctx, - GRN_TEXT_VALUE(x), - GRN_TEXT_LEN(x), - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); -} - -static grn_bool -pseudo_prefix_search(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - switch (x->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_prefix_search_text_text(ctx, x, y); - default : - break; - } - return GRN_FALSE; - default: - { - grn_obj *domain; - domain = grn_ctx_at(ctx, x->header.domain); - if (GRN_OBJ_TABLEP(domain)) { - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_prefix_search_record_text(ctx, x, domain, y); - default : - break; - } - } - } - return GRN_FALSE; - } -} - inline static void grn_expr_exec_get_member(grn_ctx *ctx, grn_obj *expr, @@ -2534,12 +2268,45 @@ grn_expr_exec_get_member(grn_ctx *ctx, GRN_OBJ_FIN(ctx, &values); } +static inline grn_bool +grn_expr_exec_is_simple_expr(grn_ctx *ctx, grn_obj *expr) +{ + grn_expr *e = (grn_expr *)expr; + + if (expr->header.type != GRN_EXPR) { + return GRN_FALSE; + } + + if (e->codes_curr != 1) { + return GRN_FALSE; + } + + switch (e->codes[0].op) { + case GRN_OP_PUSH : + return GRN_TRUE; + default : + return GRN_FALSE; + } +} + +static inline grn_obj * +grn_expr_exec_simple(grn_ctx *ctx, grn_obj *expr) +{ + grn_expr *e = (grn_expr *)expr; + + return e->codes[0].value; +} + grn_obj * grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) { grn_obj *val = NULL; uint32_t stack_curr = ctx->impl->stack_curr; GRN_API_ENTER; + if (grn_expr_exec_is_simple_expr(ctx, expr)) { + val = grn_expr_exec_simple(ctx, expr); + GRN_API_RETURN(val); + } if (expr->header.type == GRN_PROC) { grn_proc *proc = (grn_proc *)expr; if (proc->type == GRN_PROC_COMMAND) { @@ -3170,7 +2937,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) POP1(y); POP1(x); WITH_SPSAVE({ - matched = pseudo_query_scan(ctx, x, y); + matched = grn_operator_exec_match(ctx, x, y); }); ALLOC1(res); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); @@ -3207,7 +2974,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) POP1(y); POP1(x); WITH_SPSAVE({ - matched = pseudo_prefix_search(ctx, x, y); + matched = grn_operator_exec_prefix(ctx, x, y); }); ALLOC1(res); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); @@ -3697,6 +3464,21 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) code++; } break; + case GRN_OP_REGEXP : + { + grn_obj *target, *pattern; + grn_bool matched; + POP1(pattern); + POP1(target); + WITH_SPSAVE({ + matched = grn_operator_exec_regexp(ctx, target, pattern); + }); + ALLOC1(res); + grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0); + GRN_BOOL_SET(ctx, res, matched); + } + code++; + break; default : ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "not implemented operator assigned"); goto exit; @@ -3753,12 +3535,17 @@ struct _grn_scan_info { grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS]; int max_interval; int similarity_threshold; - grn_obj *scorer; + grn_obj scorers; + grn_obj scorer_args_exprs; + grn_obj scorer_args_expr_offsets; }; #define SI_FREE(si) do {\ GRN_OBJ_FIN(ctx, &(si)->wv);\ GRN_OBJ_FIN(ctx, &(si)->index);\ + GRN_OBJ_FIN(ctx, &(si)->scorers);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\ GRN_FREE(si);\ } while (0) @@ -3777,7 +3564,9 @@ struct _grn_scan_info { (si)->max_interval = DEFAULT_MAX_INTERVAL;\ (si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\ (si)->start = (st);\ - (si)->scorer = NULL;\ + GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\ } while (0) static scan_info ** @@ -3821,9 +3610,9 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star } else { s_->flags &= ~SCAN_PUSH; s_->logical_op = op; - memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j)); - memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r)); - memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j)); + grn_memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j)); + grn_memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r)); + grn_memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j)); } break; } @@ -4003,11 +3792,18 @@ grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr) static void -scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { GRN_PTR_PUT(ctx, &si->index, index); GRN_UINT32_PUT(ctx, &si->wv, sid); GRN_INT32_PUT(ctx, &si->wv, weight); + GRN_PTR_PUT(ctx, &si->scorers, scorer); + GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr); + GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset); { int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1; grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni); @@ -4015,10 +3811,10 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i if (index == pi[-1]) { if (i) { int32_t *pw = &GRN_INT32_VALUE_AT(&si->wv, (ni - i) * 2); - memmove(pw + 2, pw, sizeof(int32_t) * 2 * i); + grn_memmove(pw + 2, pw, sizeof(int32_t) * 2 * i); pw[0] = (int32_t) sid; pw[1] = weight; - memmove(pi + 1, pi, sizeof(grn_obj *) * i); + grn_memmove(pi + 1, pi, sizeof(grn_obj *) * i); pi[0] = index; } return; @@ -4028,10 +3824,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i } static int32_t -get_weight(grn_ctx *ctx, grn_expr_code *ec) +get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { if (ec->modify == 2 && ec[2].op == GRN_OP_STAR && ec[1].value && ec[1].value->header.type == GRN_BULK) { + if (offset) { + *offset = 2; + } if (ec[1].value->header.domain == GRN_DB_INT32 || ec[1].value->header.domain == GRN_DB_UINT32) { return GRN_INT32_VALUE(ec[1].value); @@ -4046,6 +3845,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec) return weight; } } else { + if (offset) { + *offset = 0; + } return 1; } } @@ -4067,7 +3869,9 @@ grn_scan_info_open(grn_ctx *ctx, int start) si->max_interval = DEFAULT_MAX_INTERVAL; si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; si->start = start; - si->scorer = NULL; + GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR); return si; } @@ -4079,9 +3883,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si) } void -grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { - scan_info_put_index(ctx, si, index, sid, weight); + scan_info_put_index(ctx, si, index, sid, weight, + scorer, + scorer_args_expr, + scorer_args_expr_offset); } scan_info ** @@ -4092,9 +3903,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, } int32_t -grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec) +grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { - return get_weight(ctx, ec); + return get_weight(ctx, ec, offset); } int @@ -4169,18 +3980,6 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold) si->similarity_threshold = similarity_threshold; } -grn_obj * -grn_scan_info_get_scorer(scan_info *si) -{ - return si->scorer; -} - -void -grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer) -{ - si->scorer = scorer; -} - grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg) { @@ -4202,32 +4001,64 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i) } static uint32_t -scan_info_build_find_index_column_index(grn_ctx *ctx, - scan_info *si, - grn_expr_code *ec, - uint32_t n_rest_codes, - grn_operator op) +scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si, + grn_expr *expr, uint32_t i, + grn_obj **index, + int *sid) { - uint32_t offset = 0; - grn_obj *index; - int sid = 0; - int32_t weight; - - index = ec->value; - if (n_rest_codes >= 2 && - ec[1].value && - (ec[1].value->header.domain == GRN_DB_INT32 || - ec[1].value->header.domain == GRN_DB_UINT32) && - ec[2].op == GRN_OP_GET_MEMBER) { - if (ec[1].value->header.domain == GRN_DB_INT32) { - sid = GRN_INT32_VALUE(ec[1].value) + 1; - } else { - sid = GRN_UINT32_VALUE(ec[1].value) + 1; + grn_expr_code *ec; + uint32_t offset = 1; + grn_index_datum index_datum; + unsigned int n_index_data = 0; + + ec = &(expr->codes[i]); + switch (ec->value->header.type) { + case GRN_ACCESSOR : + n_index_data = grn_column_find_index_data(ctx, ec->value, si->op, + &index_datum, 1); + if (n_index_data > 0) { + grn_accessor *a = (grn_accessor *)(ec->value); + *sid = index_datum.section; + if (a->next && a->obj != index_datum.index) { + *index = ec->value; + } else { + *index = index_datum.index; + } + } + break; + case GRN_COLUMN_FIX_SIZE : + case GRN_COLUMN_VAR_SIZE : + n_index_data = grn_column_find_index_data(ctx, ec->value, si->op, + &index_datum, 1); + if (n_index_data > 0) { + *index = index_datum.index; + *sid = index_datum.section; + } + break; + case GRN_COLUMN_INDEX : + { + uint32_t n_rest_codes; + + *index = ec->value; + + n_rest_codes = expr->codes_curr - i; + if (n_rest_codes >= 2 && + ec[1].value && + (ec[1].value->header.domain == GRN_DB_INT32 || + ec[1].value->header.domain == GRN_DB_UINT32) && + ec[2].op == GRN_OP_GET_MEMBER) { + if (ec[1].value->header.domain == GRN_DB_INT32) { + *sid = GRN_INT32_VALUE(ec[1].value) + 1; + } else { + *sid = GRN_UINT32_VALUE(ec[1].value) + 1; + } + offset += 2; + } } - offset = 2; + break; + default : + break; } - weight = get_weight(ctx, ec + offset); - scan_info_put_index(ctx, si, index, sid, weight); return offset; } @@ -4237,8 +4068,9 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, grn_expr *expr, uint32_t i) { grn_expr_code *ec; - grn_obj *index; - int sid; + grn_obj *index = NULL; + int sid = 0; + uint32_t offset = 0; ec = &(expr->codes[i]); if (!ec->value) { @@ -4247,29 +4079,19 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, switch (ec->value->header.type) { case GRN_ACCESSOR : - if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) { - int32_t weight = get_weight(ctx, ec); - si->flags |= SCAN_ACCESSOR; - if (((grn_accessor *)ec->value)->next) { - scan_info_put_index(ctx, si, ec->value, sid, weight); - } else { - scan_info_put_index(ctx, si, index, sid, weight); - } - } - break; case GRN_COLUMN_FIX_SIZE : case GRN_COLUMN_VAR_SIZE : - if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec)); - } - break; case GRN_COLUMN_INDEX : - { - uint32_t n_rest_codes; - uint32_t offset; - n_rest_codes = expr->codes_curr - i; - offset = scan_info_build_find_index_column_index(ctx, si, ec, - n_rest_codes, si->op); + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset - 1; + if (index) { + if (ec->value->header.type == GRN_ACCESSOR) { + si->flags |= SCAN_ACCESSOR; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + NULL, NULL, 0); i += offset; } break; @@ -4285,24 +4107,27 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, GRN_OBJ_FIN(ctx, &inspected); return expr->codes_curr; } - si->scorer = ec->value; - i = scan_info_build_match_expr_codes(ctx, si, expr, i + 1); - if (expr->codes[i].op != GRN_OP_CALL) { - grn_obj inspected; - GRN_TEXT_INIT(&inspected, 0); - grn_inspect(ctx, &inspected, si->scorer); - ERR(GRN_INVALID_ARGUMENT, - "scorer must have only one argument: <%.*s>", - (int)GRN_TEXT_LEN(&inspected), - GRN_TEXT_VALUE(&inspected)); - GRN_OBJ_FIN(ctx, &inspected); - return expr->codes_curr; + i++; + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset; + if (index) { + uint32_t scorer_args_expr_offset = 0; + if (expr->codes[i].op != GRN_OP_CALL) { + scorer_args_expr_offset = i; + } + while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) { + i++; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + ec->value, + (grn_obj *)expr, + scorer_args_expr_offset); + i += offset; } break; - case GRN_TABLE_NO_KEY : - case GRN_TABLE_HASH_KEY : - case GRN_TABLE_PAT_KEY : - case GRN_TABLE_DAT_KEY : + default : { char name[GRN_TABLE_MAX_KEY_SIZE]; int name_size; @@ -4328,26 +4153,144 @@ scan_info_build_match_expr(grn_ctx *ctx, scan_info *si, grn_expr *expr) } } +static grn_bool +is_index_searchable_regexp(grn_ctx *ctx, grn_obj *regexp) +{ + const char *regexp_raw; + const char *regexp_raw_end; + grn_bool escaping = GRN_FALSE; + + if (!(regexp->header.domain == GRN_DB_SHORT_TEXT || + regexp->header.domain == GRN_DB_TEXT || + regexp->header.domain == GRN_DB_LONG_TEXT)) { + return GRN_FALSE; + } + + regexp_raw = GRN_TEXT_VALUE(regexp); + regexp_raw_end = regexp_raw + GRN_TEXT_LEN(regexp); + + while (regexp_raw < regexp_raw_end) { + unsigned int char_len; + + char_len = grn_charlen(ctx, regexp_raw, regexp_raw_end); + if (char_len == 0) { + return GRN_FALSE; + } + + if (char_len == 1) { + if (escaping) { + escaping = GRN_FALSE; + switch (regexp_raw[0]) { + case 'Z' : + case 'b' : + case 'B' : + case 'd' : + case 'D' : + case 'h' : + case 'H' : + case 'p' : + case 's' : + case 'S' : + case 'w' : + case 'W' : + case 'X' : + case 'k' : + case 'g' : + case '1' : + case '2' : + case '3' : + case '4' : + case '5' : + case '6' : + case '7' : + case '8' : + case '9' : + return GRN_FALSE; + default : + break; + } + } else { + switch (regexp_raw[0]) { + case '.' : + case '[' : + case ']' : + case '|' : + case '?' : + case '+' : + case '*' : + case '{' : + case '}' : + case '^' : + case '$' : + case '(' : + case ')' : + escaping = GRN_FALSE; + return GRN_FALSE; + case '\\' : + escaping = GRN_TRUE; + break; + default : + escaping = GRN_FALSE; + break; + } + } + } else { + escaping = GRN_FALSE; + } + + regexp_raw += char_len; + } + + return GRN_TRUE; +} + static void scan_info_build_match(grn_ctx *ctx, scan_info *si) { - int sid; - grn_obj *index, **p = si->args, **pe = si->args + si->nargs; + grn_obj **p, **pe; + + if (si->op == GRN_OP_REGEXP) { + p = si->args; + pe = si->args + si->nargs; + for (; p < pe; p++) { + if ((*p)->header.type == GRN_BULK && + !is_index_searchable_regexp(ctx, *p)) { + return; + } + } + } + + p = si->args; + pe = si->args + si->nargs; for (; p < pe; p++) { if ((*p)->header.type == GRN_EXPR) { scan_info_build_match_expr(ctx, si, (grn_expr *)(*p)); } else if (GRN_DB_OBJP(*p)) { - if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, *p, si->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); } } else if (GRN_ACCESSORP(*p)) { + grn_index_datum index_datum; + unsigned int n_index_data; si->flags |= SCAN_ACCESSOR; - if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) { + n_index_data = grn_column_find_index_data(ctx, *p, si->op, + &index_datum, 1); + if (n_index_data > 0) { + grn_obj *index; if (((grn_accessor *)(*p))->next) { - scan_info_put_index(ctx, si, *p, sid, 1); + index = *p; } else { - scan_info_put_index(ctx, si, index, sid, 1); + index = index_datum.index; } + scan_info_put_index(ctx, si, + index, index_datum.section, 1, + NULL, NULL, 0); } } else { switch (si->op) { @@ -4412,6 +4355,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case GRN_OP_GEO_WITHINP6 : case GRN_OP_GEO_WITHINP8 : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; } stat = SCAN_START; m++; @@ -4492,6 +4436,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case GRN_OP_GEO_WITHINP6 : case GRN_OP_GEO_WITHINP8 : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : stat = SCAN_START; si->op = c->op; si->end = c - e->codes; @@ -4572,17 +4517,28 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, sis[i++] = si; /* better index resolving framework for functions should be implemented */ { - int sid; - grn_obj *index, **p = si->args, **pe = si->args + si->nargs; + grn_obj **p = si->args, **pe = si->args + si->nargs; for (; p < pe; p++) { if (GRN_DB_OBJP(*p)) { - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, *p, c->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); } } else if (GRN_ACCESSORP(*p)) { + grn_index_datum index_datum; + unsigned int n_index_data; si->flags |= SCAN_ACCESSOR; - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + n_index_data = grn_column_find_index_data(ctx, *p, c->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); } } else { si->query = *p; @@ -4924,10 +4880,18 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table, accessor = (grn_accessor *)GRN_PTR_VALUE_AT(accessor_stack, i - 1); target = accessor->obj; - if (grn_column_index(ctx, target, GRN_OP_EQUAL, &index, 1, §ion) == 0) { - grn_obj_unlink(ctx, current_res); - current_res = NULL; - break; + { + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, target, GRN_OP_EQUAL, + &index_datum, 1); + if (n_index_data == 0) { + grn_obj_unlink(ctx, current_res); + current_res = NULL; + break; + } + index = index_datum.index; + section = index_datum.section; } if (section > 0) { @@ -5100,7 +5064,10 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, GRN_BULK_HEAD(si->query), GRN_BULK_VSIZE(si->query)); } - grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, si->logical_op); + if (tid != GRN_ID_NIL) { + grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, + si->logical_op); + } } grn_ii_resolve_sel_and(ctx, (grn_hash *)res, si->logical_op); processed = GRN_TRUE; @@ -5188,9 +5155,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, case GRN_OP_NEAR : case GRN_OP_NEAR2 : case GRN_OP_SIMILAR : + case GRN_OP_REGEXP : { grn_obj wv, **ip = &GRN_PTR_VALUE(&si->index); - int j = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); + int j; + int n_indexes = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); int32_t *wp = &GRN_INT32_VALUE(&si->wv); grn_search_optarg optarg; GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR); @@ -5217,21 +5186,31 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, optarg.vector_size = 1; optarg.proc = NULL; optarg.max_size = 0; - optarg.scorer = si->scorer; ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; - for (; j--; ip++, wp += 2) { + for (j = 0; j < n_indexes; j++, ip++, wp += 2) { uint32_t sid = (uint32_t) wp[0]; int32_t weight = wp[1]; if (sid) { int weight_index = sid - 1; - GRN_INT32_SET_AT(ctx, &wv, weight_index, weight); + int current_vector_size; + current_vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t); + if (weight_index < current_vector_size) { + ((int *)GRN_BULK_HEAD(&wv))[weight_index] = weight; + } else { + GRN_INT32_SET_AT(ctx, &wv, weight_index, weight); + } optarg.weight_vector = &GRN_INT32_VALUE(&wv); optarg.vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t); } else { optarg.weight_vector = NULL; optarg.vector_size = weight; } - if (j) { + optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j); + optarg.scorer_args_expr = + GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j); + optarg.scorer_args_expr_offset = + GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j); + if (j < n_indexes - 1) { if (sid && ip[0] == ip[1]) { continue; } } else { ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; @@ -5475,239 +5454,6 @@ skip_space(grn_ctx *ctx, efs_info *q) } } -static grn_rc get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode); -static grn_rc get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode); - -static grn_rc -get_phrase(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option) -{ - const char *start, *s; - start = s = q->cur; - GRN_BULK_REWIND(&q->buf); - while (1) { - unsigned int len; - if (s >= q->str_end) { - q->cur = s; - break; - } - len = grn_charlen(ctx, s, q->str_end); - if (len == 0) { - /* invalid string containing malformed multibyte char */ - return GRN_END_OF_DATA; - } else if (len == 1) { - if (*s == GRN_QUERY_QUOTER) { - q->cur = s + 1; - break; - } else if (*s == GRN_QUERY_ESCAPE && s + 1 < q->str_end) { - s++; - len = grn_charlen(ctx, s, q->str_end); - } - } - GRN_TEXT_PUT(ctx, &q->buf, s, len); - s += len; - } - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const(ctx, q->e, &q->buf, GRN_OP_PUSH, 1); - if (mode == GRN_OP_MATCH || mode == GRN_OP_EXACT) { - grn_expr_append_op(ctx, q->e, mode, 2); - } else { - grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, mode, 3); - } - return GRN_SUCCESS; -} - -static grn_rc -get_geocond(grn_ctx *ctx, efs_info *q, grn_obj *longitude, grn_obj *latitude) -{ - unsigned int len; - const char *start = q->cur, *end; - for (end = q->cur;; ) { - /* null check and length check */ - if (!(len = grn_charlen(ctx, end, q->str_end))) { - q->cur = q->str_end; - break; - } - if (grn_isspace(end, ctx->encoding) || - *end == GRN_QUERY_PARENR) { - q->cur = end; - break; - } - } - { - const char *tokbuf[8]; - int32_t lng0, lat0, lng1, lat1, lng2, lat2, r; - int32_t n = grn_str_tok((char *)start, end - start, ',', tokbuf, 8, NULL); - switch (n) { - case 3 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - r = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, r, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP5, 5); - break; - case 4 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP6, 6); - break; - case 6 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL); - lng2 = grn_atoi(tokbuf[3] + 1, tokbuf[4], NULL); - lat2 = grn_atoi(tokbuf[4] + 1, tokbuf[5], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng2, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat2, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP8, 8); - break; - default : - ERR(GRN_INVALID_ARGUMENT, "invalid geocond"); - break; - } - } - return ctx->rc; -} - -static grn_rc -get_word(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option) -{ - const char *start = q->cur, *end; - unsigned int len; - for (end = q->cur;; ) { - /* null check and length check */ - if (!(len = grn_charlen(ctx, end, q->str_end))) { - q->cur = q->str_end; - break; - } - if (grn_isspace(end, ctx->encoding) || - *end == GRN_QUERY_PARENR) { - q->cur = end; - break; - } - if (*end == GRN_QUERY_COLUMN) { - grn_obj *c = grn_obj_column(ctx, q->table, start, end - start); - if (c && end + 1 < q->str_end) { - efs_op op; - switch (end[1]) { - case '!' : - mode = GRN_OP_NOT_EQUAL; - q->cur = end + 2; - break; - case '=' : - if (q->flags & GRN_EXPR_ALLOW_UPDATE) { - mode = GRN_OP_ASSIGN; - q->cur = end + 2; - } else { - get_token(ctx, q, &op, c, mode); - } - break; - case '<' : - if (end + 2 < q->str_end && end[2] == '=') { - mode = GRN_OP_LESS_EQUAL; - q->cur = end + 3; - } else { - mode = GRN_OP_LESS; - q->cur = end + 2; - } - break; - case '>' : - if (end + 2 < q->str_end && end[2] == '=') { - mode = GRN_OP_GREATER_EQUAL; - q->cur = end + 3; - } else { - mode = GRN_OP_GREATER; - q->cur = end + 2; - } - break; - case '%' : - mode = GRN_OP_MATCH; - q->cur = end + 2; - break; - case '@' : - q->cur = end + 2; - return get_geocond(ctx, q, column, c); - break; - default : - mode = GRN_OP_EQUAL; - q->cur = end + 1; - break; - } - return get_token(ctx, q, &op, c, mode); - } else { - ERR(GRN_INVALID_ARGUMENT, "column lookup failed"); - return ctx->rc; - } - } else if (*end == GRN_QUERY_PREFIX) { - mode = GRN_OP_PREFIX; - q->cur = end + 1; - break; - } - end += len; - } - if (!column) { - ERR(GRN_INVALID_ARGUMENT, "column missing"); - return ctx->rc; - } - if (mode == GRN_OP_ASSIGN) { - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_ASSIGN, 2); - } else { - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1); - switch (mode) { - case GRN_OP_NEAR : - case GRN_OP_NEAR2 : - case GRN_OP_SIMILAR : - case GRN_OP_TERM_EXTRACT : - grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, mode, 3); - break; - default : - grn_expr_append_op(ctx, q->e, mode, 2); - break; - } - } - return GRN_SUCCESS; -} - static grn_bool get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option) { @@ -5756,90 +5502,6 @@ get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option) return found; } -static grn_rc -get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode) -{ - int option = 0; - op->op = q->default_op; - op->weight = DEFAULT_WEIGHT; - for (;;) { - skip_space(ctx, q); - if (q->cur >= q->str_end) { return GRN_END_OF_DATA; } - switch (*q->cur) { - case '\0' : - return GRN_END_OF_DATA; - break; - case GRN_QUERY_PARENR : - q->cur++; - return GRN_END_OF_DATA; - break; - case GRN_QUERY_QUOTEL : - q->cur++; - return get_phrase(ctx, q, column, mode, option); - break; - case GRN_QUERY_PREFIX : - q->cur++; - get_op(q, op, &mode, &option); - break; - case GRN_QUERY_AND : - q->cur++; - op->op = GRN_OP_AND; - break; - case GRN_QUERY_AND_NOT : - q->cur++; - op->op = GRN_OP_AND_NOT; - break; - case GRN_QUERY_ADJ_INC : - q->cur++; - if (op->weight < 127) { op->weight++; } - op->op = GRN_OP_ADJUST; - break; - case GRN_QUERY_ADJ_DEC : - q->cur++; - if (op->weight > -128) { op->weight--; } - op->op = GRN_OP_ADJUST; - break; - case GRN_QUERY_ADJ_NEG : - q->cur++; - op->op = GRN_OP_ADJUST; - op->weight = -1; - break; - case GRN_QUERY_PARENL : - q->cur++; - return get_expr(ctx, q, column, mode); - break; - case 'O' : - if (q->cur[1] == 'R' && q->cur[2] == ' ') { - q->cur += 2; - op->op = GRN_OP_OR; - break; - } - /* fallthru */ - default : - return get_word(ctx, q, column, mode, option); - break; - } - } - return GRN_SUCCESS; -} - -static grn_rc -get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode) -{ - efs_op op; - grn_rc rc = get_token(ctx, q, &op, column, mode); - if (rc) { return rc; } - while (!(rc = get_token(ctx, q, &op, column, mode))) { - if (op.op == GRN_OP_ADJUST) { - grn_expr_append_const_int(ctx, q->e, op.weight, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, op.op, 3); - } else { - grn_expr_append_op(ctx, q->e, op.op, 2); - } - } - return rc; -} - #define DISABLE_UNUSED_CODE 1 #ifndef DISABLE_UNUSED_CODE static const char * @@ -6090,6 +5752,10 @@ get_word_(grn_ctx *ctx, efs_info *q) mode = GRN_OP_SUFFIX; q->cur = end + 2; break; + case '~' : + mode = GRN_OP_REGEXP; + q->cur = end + 2; + break; default : mode = GRN_OP_EQUAL; q->cur = end + 1; @@ -6502,6 +6168,10 @@ parse_script(grn_ctx *ctx, efs_info *q) PARSE(GRN_EXPR_TOKEN_SUFFIX); q->cur += 2; break; + case '~' : + PARSE(GRN_EXPR_TOKEN_REGEXP); + q->cur += 2; + break; default : PARSE(GRN_EXPR_TOKEN_MATCH); q->cur++; @@ -7195,3 +6865,42 @@ grn_expr_dump_plan(grn_ctx *ctx, grn_obj *expr, grn_obj *buffer) } GRN_API_RETURN(GRN_SUCCESS); } + +static unsigned int +grn_expr_estimate_size_raw(grn_ctx *ctx, grn_obj *expr, grn_obj *table) +{ + return grn_table_size(ctx, table); +} + +unsigned int +grn_expr_estimate_size(grn_ctx *ctx, grn_obj *expr) +{ + grn_obj *table; + grn_obj *variable; + unsigned int size; + + variable = grn_expr_get_var_by_offset(ctx, expr, 0); + if (!variable) { + ERR(GRN_INVALID_ARGUMENT, "at least one variable must be defined"); + return 0; + } + + table = grn_ctx_at(ctx, variable->header.domain); + if (!table) { + ERR(GRN_INVALID_ARGUMENT, + "variable refers unknown domain: <%u>", variable->header.domain); + return 0; + } + + GRN_API_ENTER; +#ifdef GRN_WITH_MRUBY + if (ctx->impl->mrb.state) { + size = grn_mrb_expr_estimate_size(ctx, expr, table); + } else { + size = grn_expr_estimate_size_raw(ctx, expr, table); + } +#else + size = grn_expr_estimate_size_raw(ctx, expr, table); +#endif + GRN_API_RETURN(size); +} |