diff options
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/expr.c')
-rw-r--r-- | storage/mroonga/vendor/groonga/lib/expr.c | 2132 |
1 files changed, 940 insertions, 1192 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/expr.c b/storage/mroonga/vendor/groonga/lib/expr.c index dd70ebecb40..f13502a0f14 100644 --- a/storage/mroonga/vendor/groonga/lib/expr.c +++ b/storage/mroonga/vendor/groonga/lib/expr.c @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2010-2014 Brazil + Copyright(C) 2010-2015 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -15,33 +15,17 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "groonga_in.h" -#include "db.h" -#include "ctx_impl.h" +#include "grn.h" +#include "grn_db.h" +#include "grn_ctx_impl.h" #include <string.h> -#include <float.h> -#include "ii.h" -#include "geo.h" -#include "expr.h" -#include "util.h" -#include "normalizer_in.h" -#include "mrb.h" +#include "grn_ii.h" +#include "grn_geo.h" +#include "grn_expr.h" +#include "grn_util.h" +#include "grn_mrb.h" #include "mrb/mrb_expr.h" -static inline int -function_proc_p(grn_obj *obj) -{ - return (obj && - obj->header.type == GRN_PROC && - ((grn_proc *)obj)->type == GRN_PROC_FUNCTION); -} - -static inline int -selector_proc_p(grn_obj *obj) -{ - return (function_proc_p(obj) && ((grn_proc *)obj)->selector); -} - grn_obj * grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags) { @@ -60,7 +44,7 @@ grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags) return res; } -static grn_hash * +grn_hash * grn_expr_get_vars(grn_ctx *ctx, grn_obj *expr, unsigned int *nvars) { grn_hash *vars = NULL; @@ -131,6 +115,18 @@ grn_proc_get_info(grn_ctx *ctx, grn_user_data *user_data, } grn_obj * +grn_proc_get_vars(grn_ctx *ctx, grn_user_data *user_data) +{ + uint32_t n; + grn_proc_ctx *pctx = (grn_proc_ctx *)user_data; + if (pctx->proc) { + return (grn_obj *)grn_expr_get_vars(ctx, (grn_obj *)pctx->proc, &n); + } else { + return NULL; + } +} + +grn_obj * grn_proc_get_var(grn_ctx *ctx, grn_user_data *user_data, const char *name, unsigned int name_size) { grn_proc_ctx *pctx = (grn_proc_ctx *)user_data; @@ -170,7 +166,7 @@ grn_rc grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc, grn_selector_func selector) { grn_proc *proc_ = (grn_proc *)proc; - if (!function_proc_p(proc)) { + if (!grn_obj_is_function_proc(ctx, proc)) { return GRN_INVALID_ARGUMENT; } proc_->selector = selector; @@ -179,151 +175,6 @@ grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc, grn_selector_func selector) /* grn_expr */ -static const char *opstrs[] = { - "PUSH", - "POP", - "NOP", - "CALL", - "INTERN", - "GET_REF", - "GET_VALUE", - "AND", - "AND_NOT", - "OR", - "ASSIGN", - "STAR_ASSIGN", - "SLASH_ASSIGN", - "MOD_ASSIGN", - "PLUS_ASSIGN", - "MINUS_ASSIGN", - "SHIFTL_ASSIGN", - "SHIFTR_ASSIGN", - "SHIFTRR_ASSIGN", - "AND_ASSIGN", - "XOR_ASSIGN", - "OR_ASSIGN", - "JUMP", - "CJUMP", - "COMMA", - "BITWISE_OR", - "BITWISE_XOR", - "BITWISE_AND", - "BITWISE_NOT", - "EQUAL", - "NOT_EQUAL", - "LESS", - "GREATER", - "LESS_EQUAL", - "GREATER_EQUAL", - "IN", - "MATCH", - "NEAR", - "NEAR2", - "SIMILAR", - "TERM_EXTRACT", - "SHIFTL", - "SHIFTR", - "SHIFTRR", - "PLUS", - "MINUS", - "STAR", - "SLASH", - "MOD", - "DELETE", - "INCR", - "DECR", - "INCR_POST", - "DECR_POST", - "NOT", - "ADJUST", - "EXACT", - "LCP", - "PARTIAL", - "UNSPLIT", - "PREFIX", - "SUFFIX", - "GEO_DISTANCE1", - "GEO_DISTANCE2", - "GEO_DISTANCE3", - "GEO_DISTANCE4", - "GEO_WITHINP5", - "GEO_WITHINP6", - "GEO_WITHINP8", - "OBJ_SEARCH", - "EXPR_GET_VAR", - "TABLE_CREATE", - "TABLE_SELECT", - "TABLE_SORT", - "TABLE_GROUP", - "JSON_PUT" -}; - -static void -put_value(grn_ctx *ctx, grn_obj *buf, grn_obj *obj) -{ - int len; - char namebuf[GRN_TABLE_MAX_KEY_SIZE]; - if ((len = grn_column_name(ctx, obj, namebuf, GRN_TABLE_MAX_KEY_SIZE))) { - GRN_TEXT_PUT(ctx, buf, namebuf, len); - } else { - grn_text_otoj(ctx, buf, obj, NULL); - } -} - -grn_rc -grn_expr_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *expr) -{ - uint32_t i, j; - grn_expr_var *var; - grn_expr_code *code; - grn_expr *e = (grn_expr *)expr; - grn_hash *vars = grn_expr_get_vars(ctx, expr, &i); - GRN_TEXT_PUTS(ctx, buf, "noname"); - GRN_TEXT_PUTC(ctx, buf, '('); - { - int i = 0; - grn_obj *value; - const char *name; - uint32_t name_len; - GRN_HASH_EACH(ctx, vars, id, &name, &name_len, &value, { - if (i++) { GRN_TEXT_PUTC(ctx, buf, ','); } - GRN_TEXT_PUT(ctx, buf, name, name_len); - GRN_TEXT_PUTC(ctx, buf, ':'); - put_value(ctx, buf, value); - }); - } - GRN_TEXT_PUTC(ctx, buf, ')'); - GRN_TEXT_PUTC(ctx, buf, '{'); - for (j = 0, code = e->codes; j < e->codes_curr; j++, code++) { - if (j) { GRN_TEXT_PUTC(ctx, buf, ','); } - grn_text_itoa(ctx, buf, code->modify); - if (code->op == GRN_OP_PUSH) { - for (i = 0, var = e->vars; i < e->nvars; i++, var++) { - if (&var->value == code->value) { - GRN_TEXT_PUTC(ctx, buf, '?'); - if (var->name_size) { - GRN_TEXT_PUT(ctx, buf, var->name, var->name_size); - } else { - grn_text_itoa(ctx, buf, (int)i); - } - break; - } - } - if (i == e->nvars) { - put_value(ctx, buf, code->value); - } - } else { - if (code->value) { - put_value(ctx, buf, code->value); - GRN_TEXT_PUTC(ctx, buf, ' '); - } - GRN_TEXT_PUTS(ctx, buf, opstrs[code->op]); - } - } - GRN_TEXT_PUTC(ctx, buf, '}'); - return GRN_SUCCESS; -} - grn_obj * grn_ctx_pop(grn_ctx *ctx) { @@ -504,7 +355,7 @@ grn_expr_open(grn_ctx *ctx, grn_obj_spec *spec, const uint8_t *p, const uint8_t { grn_expr *expr = NULL; if ((expr = GRN_MALLOCN(grn_expr, 1))) { - int size = 256; + int size = GRN_STACK_SIZE; expr->consts = NULL; expr->nconsts = 0; GRN_TEXT_INIT(&expr->name_buf, 0); @@ -869,6 +720,21 @@ grn_expr_get_var_by_offset(grn_ctx *ctx, grn_obj *expr, unsigned int offset) DFI_PUT(e, type, domain, code); \ } while (0) +static void +grn_expr_append_obj_resolve_const(grn_ctx *ctx, + grn_obj *obj, + grn_id to_domain) +{ + grn_obj dest; + + GRN_OBJ_INIT(&dest, GRN_BULK, 0, to_domain); + if (!grn_obj_cast(ctx, obj, &dest, GRN_FALSE)) { + grn_obj_reinit(ctx, obj, to_domain, 0); + grn_bulk_write(ctx, obj, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); + } + GRN_OBJ_FIN(ctx, &dest); +} + grn_obj * grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, int nargs) { @@ -929,7 +795,8 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, ERR(GRN_INVALID_ARGUMENT, "invalid function call expression"); goto exit; } - if (!function_proc_p(proc)) { + if (!(grn_obj_is_function_proc(ctx, proc) || + grn_obj_is_scorer_proc(ctx, proc))) { grn_obj buffer; GRN_TEXT_INIT(&buffer, 0); @@ -1000,26 +867,14 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, if (CONSTP(y)) { /* todo */ } else { - grn_obj dest; if (xd != yd) { - GRN_OBJ_INIT(&dest, GRN_BULK, 0, yd); - if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) { - grn_obj_reinit(ctx, x, yd, 0); - grn_bulk_write(ctx, x, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); - } - GRN_OBJ_FIN(ctx, &dest); + grn_expr_append_obj_resolve_const(ctx, x, yd); } } } else { if (CONSTP(y)) { - grn_obj dest; if (xd != yd) { - GRN_OBJ_INIT(&dest, GRN_BULK, 0, xd); - if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) { - grn_obj_reinit(ctx, y, xd, 0); - grn_bulk_write(ctx, y, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest)); - } - GRN_OBJ_FIN(ctx, &dest); + grn_expr_append_obj_resolve_const(ctx, y, xd); } } } @@ -1054,6 +909,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, case GRN_OP_GET_REF : case GRN_OP_ADJUST : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : PUSH_CODE(e, op, obj, nargs, code); if (nargs) { int i = nargs - 1; @@ -1367,425 +1223,6 @@ grn_expr_compile(grn_ctx *ctx, grn_obj *expr) s1 = sp[-2];\ } while (0) -#define DO_COMPARE_SUB_NUMERIC(y,op) do {\ - switch ((y)->header.domain) {\ - case GRN_DB_INT8 :\ - r = (x_ op GRN_INT8_VALUE(y));\ - break;\ - case GRN_DB_UINT8 :\ - r = (x_ op GRN_UINT8_VALUE(y));\ - break;\ - case GRN_DB_INT16 :\ - r = (x_ op GRN_INT16_VALUE(y));\ - break;\ - case GRN_DB_UINT16 :\ - r = (x_ op GRN_UINT16_VALUE(y));\ - break;\ - case GRN_DB_INT32 :\ - r = (x_ op GRN_INT32_VALUE(y));\ - break;\ - case GRN_DB_UINT32 :\ - r = (x_ op GRN_UINT32_VALUE(y));\ - break;\ - case GRN_DB_INT64 :\ - r = (x_ op GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_TIME :\ - r = (GRN_TIME_PACK(x_,0) op GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_UINT64 :\ - r = (x_ op GRN_UINT64_VALUE(y));\ - break;\ - case GRN_DB_FLOAT :\ - r = (x_ op GRN_FLOAT_VALUE(y));\ - break;\ - default :\ - r = 0;\ - break;\ - }\ -} while (0) - -#define DO_COMPARE_SUB(op) do {\ - switch (y->header.domain) {\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - {\ - grn_obj y_;\ - GRN_OBJ_INIT(&y_, GRN_BULK, 0, x->header.domain);\ - if (grn_obj_cast(ctx, y, &y_, GRN_FALSE)) {\ - r = 0;\ - } else {\ - DO_COMPARE_SUB_NUMERIC(&y_, op);\ - }\ - GRN_OBJ_FIN(ctx, &y_);\ - }\ - break;\ - default :\ - DO_COMPARE_SUB_NUMERIC(y,op);\ - break;\ - }\ -} while (0) - -#define DO_COMPARE_BUILTIN(x,y,r,op) do {\ - switch (x->header.domain) {\ - case GRN_DB_INT8 :\ - {\ - int8_t x_ = GRN_INT8_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_UINT8 :\ - {\ - uint8_t x_ = GRN_UINT8_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_INT16 :\ - {\ - int16_t x_ = GRN_INT16_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_UINT16 :\ - {\ - uint16_t x_ = GRN_UINT16_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_INT32 :\ - {\ - int32_t x_ = GRN_INT32_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_UINT32 :\ - {\ - uint32_t x_ = GRN_UINT32_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_TIME :\ - {\ - int64_t x_ = GRN_INT64_VALUE(x);\ - switch (y->header.domain) {\ - case GRN_DB_INT32 :\ - r = (x_ op GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\ - break;\ - case GRN_DB_UINT32 :\ - r = (x_ op GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\ - break;\ - case GRN_DB_INT64 :\ - case GRN_DB_TIME :\ - r = (x_ op GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_UINT64 :\ - r = (x_ op GRN_UINT64_VALUE(y));\ - break;\ - case GRN_DB_FLOAT :\ - r = (x_ op GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - {\ - const char *p_ = GRN_TEXT_VALUE(y);\ - int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\ - r = (x_ op GRN_TIME_PACK(i_, 0));\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ - }\ - break;\ - case GRN_DB_INT64 :\ - {\ - int64_t x_ = GRN_INT64_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_UINT64 :\ - {\ - uint64_t x_ = GRN_UINT64_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_FLOAT :\ - {\ - double x_ = GRN_FLOAT_VALUE(x);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\ - int r_;\ - uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\ - if (la > lb) {\ - if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb))) {\ - r_ = 1;\ - }\ - } else {\ - if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), la))) {\ - r_ = la == lb ? 0 : -1;\ - }\ - }\ - r = (r_ op 0);\ - } else {\ - const char *q_ = GRN_TEXT_VALUE(x);\ - int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\ - DO_COMPARE_SUB(op);\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ -} while (0) - -#define DO_COMPARE(x, y, r, op) do {\ - if (x->header.domain >= GRN_N_RESERVED_TYPES) {\ - grn_obj *table;\ - table = grn_ctx_at(ctx, x->header.domain);\ - switch (table->header.type) {\ - case GRN_TABLE_HASH_KEY :\ - case GRN_TABLE_PAT_KEY :\ - {\ - grn_obj key;\ - int length;\ - GRN_OBJ_INIT(&key, GRN_BULK, 0, table->header.domain);\ - length = grn_table_get_key2(ctx, table, GRN_RECORD_VALUE(x), &key);\ - if (length > 0) {\ - grn_obj *x_original = x;\ - x = &key;\ - DO_COMPARE_BUILTIN((&key), y, r, op);\ - x = x_original;\ - } else {\ - r = 0;\ - }\ - GRN_OBJ_FIN(ctx, &key);\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ - grn_obj_unlink(ctx, table);\ - } else {\ - DO_COMPARE_BUILTIN(x, y, r, op);\ - }\ -} while (0) - -#define DO_EQ_SUB do {\ - switch (y->header.domain) {\ - case GRN_DB_INT8 :\ - r = (x_ == GRN_INT8_VALUE(y));\ - break;\ - case GRN_DB_UINT8 :\ - r = (x_ == GRN_UINT8_VALUE(y));\ - break;\ - case GRN_DB_INT16 :\ - r = (x_ == GRN_INT16_VALUE(y));\ - break;\ - case GRN_DB_UINT16 :\ - r = (x_ == GRN_UINT16_VALUE(y));\ - break;\ - case GRN_DB_INT32 :\ - r = (x_ == GRN_INT32_VALUE(y));\ - break;\ - case GRN_DB_UINT32 :\ - r = (x_ == GRN_UINT32_VALUE(y));\ - break;\ - case GRN_DB_INT64 :\ - r = (x_ == GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_TIME :\ - r = (GRN_TIME_PACK(x_,0) == GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_UINT64 :\ - r = (x_ == GRN_UINT64_VALUE(y));\ - break;\ - case GRN_DB_FLOAT :\ - r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - {\ - const char *p_ = GRN_TEXT_VALUE(y);\ - int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\ - r = (x_ == i_);\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ -} while (0) - -#define DO_EQ(x,y,r) do {\ - switch (x->header.domain) {\ - case GRN_DB_VOID :\ - r = 0;\ - break;\ - case GRN_DB_INT8 :\ - {\ - int8_t x_ = GRN_INT8_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_UINT8 :\ - {\ - uint8_t x_ = GRN_UINT8_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_INT16 :\ - {\ - int16_t x_ = GRN_INT16_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_UINT16 :\ - {\ - uint16_t x_ = GRN_UINT16_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_INT32 :\ - {\ - int32_t x_ = GRN_INT32_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_UINT32 :\ - {\ - uint32_t x_ = GRN_UINT32_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_INT64 :\ - {\ - int64_t x_ = GRN_INT64_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_TIME :\ - {\ - int64_t x_ = GRN_INT64_VALUE(x);\ - switch (y->header.domain) {\ - case GRN_DB_INT32 :\ - r = (x_ == GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\ - break;\ - case GRN_DB_UINT32 :\ - r = (x_ == GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\ - break;\ - case GRN_DB_INT64 :\ - case GRN_DB_TIME :\ - r = (x_ == GRN_INT64_VALUE(y));\ - break;\ - case GRN_DB_UINT64 :\ - r = (x_ == GRN_UINT64_VALUE(y));\ - break;\ - case GRN_DB_FLOAT :\ - r = (x_ == GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - {\ - const char *p_ = GRN_TEXT_VALUE(y);\ - int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\ - r = (x_ == GRN_TIME_PACK(i_, 0));\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ - }\ - break;\ - case GRN_DB_UINT64 :\ - {\ - uint64_t x_ = GRN_UINT64_VALUE(x);\ - DO_EQ_SUB;\ - }\ - break;\ - case GRN_DB_FLOAT :\ - {\ - double x_ = GRN_FLOAT_VALUE(x);\ - switch (y->header.domain) {\ - case GRN_DB_INT32 :\ - r = ((x_ <= GRN_INT32_VALUE(y)) && (x_ >= GRN_INT32_VALUE(y)));\ - break;\ - case GRN_DB_UINT32 :\ - r = ((x_ <= GRN_UINT32_VALUE(y)) && (x_ >= GRN_UINT32_VALUE(y)));\ - break;\ - case GRN_DB_INT64 :\ - case GRN_DB_TIME :\ - r = ((x_ <= GRN_INT64_VALUE(y)) && (x_ >= GRN_INT64_VALUE(y)));\ - break;\ - case GRN_DB_UINT64 :\ - r = ((x_ <= GRN_UINT64_VALUE(y)) && (x_ >= GRN_UINT64_VALUE(y)));\ - break;\ - case GRN_DB_FLOAT :\ - r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - {\ - const char *p_ = GRN_TEXT_VALUE(y);\ - int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\ - r = (x_ <= i_ && x_ >= i_);\ - }\ - break;\ - default :\ - r = 0;\ - break;\ - }\ - }\ - break;\ - case GRN_DB_SHORT_TEXT :\ - case GRN_DB_TEXT :\ - case GRN_DB_LONG_TEXT :\ - if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\ - uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\ - r = (la == lb && !memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb));\ - } else {\ - const char *q_ = GRN_TEXT_VALUE(x);\ - int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\ - DO_EQ_SUB;\ - }\ - break;\ - default :\ - if ((x->header.domain == y->header.domain)) {\ - r = (GRN_BULK_VSIZE(x) == GRN_BULK_VSIZE(y) &&\ - !(memcmp(GRN_BULK_HEAD(x), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(x))));\ - } else {\ - grn_obj dest;\ - if (x->header.domain < y->header.domain) {\ - GRN_OBJ_INIT(&dest, GRN_BULK, 0, y->header.domain);\ - if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) {\ - r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(y) &&\ - !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(y))); \ - }\ - } else {\ - GRN_OBJ_INIT(&dest, GRN_BULK, 0, x->header.domain);\ - if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) {\ - r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(x) &&\ - !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(x), GRN_BULK_VSIZE(x))); \ - }\ - }\ - GRN_OBJ_FIN(ctx, &dest);\ - }\ - break;\ - }\ -} while (0) - #define GEO_RESOLUTION 3600000 #define GEO_RADIOUS 6357303 #define GEO_BES_C1 6334834 @@ -2230,7 +1667,8 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) code++; \ } while (0) -#define ARITHMETIC_BINARY_OPERATION_DISPATCH(integer8_operation, \ +#define ARITHMETIC_BINARY_OPERATION_DISPATCH(operator, \ + integer8_operation, \ integer16_operation, \ integer32_operation, \ integer64_operation, \ @@ -2242,6 +1680,23 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) grn_obj *x, *y; \ \ POP2ALLOC1(x, y, res); \ + if (x->header.type == GRN_VECTOR || y->header.type == GRN_VECTOR) { \ + grn_obj inspected_x; \ + grn_obj inspected_y; \ + GRN_TEXT_INIT(&inspected_x, 0); \ + GRN_TEXT_INIT(&inspected_y, 0); \ + grn_inspect(ctx, &inspected_x, x); \ + grn_inspect(ctx, &inspected_y, y); \ + ERR(GRN_INVALID_ARGUMENT, \ + "<%s> doesn't support vector: <%.*s> %s <%.*s>", \ + operator, \ + (int)GRN_TEXT_LEN(&inspected_x), GRN_TEXT_VALUE(&inspected_x), \ + operator, \ + (int)GRN_TEXT_LEN(&inspected_y), GRN_TEXT_VALUE(&inspected_y)); \ + GRN_OBJ_FIN(ctx, &inspected_x); \ + GRN_OBJ_FIN(ctx, &inspected_y); \ + goto exit; \ + } \ if (y != res) { \ res->header.domain = x->header.domain; \ } \ @@ -2667,7 +2122,9 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) } \ if (GRN_BULK_VSIZE(var) != (sizeof(grn_obj *) + sizeof(grn_id))) { \ ERR(GRN_INVALID_ARGUMENT, \ - "invalid variable size: expected: %zu, actual: %zu", \ + "invalid variable size: " \ + "expected: %" GRN_FMT_SIZE \ + "actual: %" GRN_FMT_SIZE, \ (sizeof(grn_obj *) + sizeof(grn_id)), GRN_BULK_VSIZE(var)); \ goto exit; \ } \ @@ -2768,52 +2225,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) } \ } while (0) -static grn_bool -pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - grn_obj *normalizer; - grn_obj *a = NULL, *b = NULL; - grn_bool matched = GRN_FALSE; - - normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); - switch (x->header.domain) { - case GRN_DB_SHORT_TEXT: - case GRN_DB_TEXT: - case GRN_DB_LONG_TEXT: - a = grn_string_open(ctx, GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x), - normalizer, 0); - break; - default: - break; - } - - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT: - case GRN_DB_TEXT: - case GRN_DB_LONG_TEXT: - b = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y), - normalizer, 0); - break; - default: - break; - } - - /* normalized str doesn't contain '\0'. */ - if (a && b) { - const char *a_norm, *b_norm; - grn_string_get_normalized(ctx, a, &a_norm, NULL, NULL); - grn_string_get_normalized(ctx, b, &b_norm, NULL, NULL); - matched = (strstr(a_norm, b_norm) != NULL); - } - - if (a) { grn_obj_close(ctx, a); } - if (b) { grn_obj_close(ctx, b); } - - if (normalizer) { grn_obj_unlink(ctx, normalizer); } - - return matched; -} - inline static void grn_expr_exec_get_member(grn_ctx *ctx, grn_obj *expr, @@ -2831,10 +2242,10 @@ grn_expr_exec_get_member(grn_ctx *ctx, GRN_TEXT_INIT(&values, 0); grn_obj_get_value(ctx, column, record_id, &values); - grn_obj_reinit(ctx, result, DB_OBJ(column)->range, 0); i = GRN_UINT32_VALUE(index); if (values.header.type == GRN_UVECTOR) { int n_elements; + grn_obj_reinit(ctx, result, DB_OBJ(column)->range, 0); n_elements = GRN_BULK_VSIZE(&values) / sizeof(grn_id); if (n_elements > i) { grn_id value; @@ -2843,25 +2254,70 @@ grn_expr_exec_get_member(grn_ctx *ctx, } } else { if (values.u.v.n_sections > i) { - grn_section *section = &(values.u.v.sections[i]); - grn_obj *body = values.u.v.body; - const char *value; - value = GRN_BULK_HEAD(body) + section->offset; - grn_bulk_write(ctx, result, value, section->length); + const char *content; + unsigned int content_length; + grn_id domain; + + content_length = grn_vector_get_element(ctx, &values, i, + &content, NULL, &domain); + grn_obj_reinit(ctx, result, domain, 0); + grn_bulk_write(ctx, result, content, content_length); } } GRN_OBJ_FIN(ctx, &values); } +static inline grn_bool +grn_expr_exec_is_simple_expr(grn_ctx *ctx, grn_obj *expr) +{ + grn_expr *e = (grn_expr *)expr; + + if (expr->header.type != GRN_EXPR) { + return GRN_FALSE; + } + + if (e->codes_curr != 1) { + return GRN_FALSE; + } + + switch (e->codes[0].op) { + case GRN_OP_PUSH : + return GRN_TRUE; + default : + return GRN_FALSE; + } +} + +static inline grn_obj * +grn_expr_exec_simple(grn_ctx *ctx, grn_obj *expr) +{ + grn_expr *e = (grn_expr *)expr; + + return e->codes[0].value; +} + grn_obj * grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) { grn_obj *val = NULL; uint32_t stack_curr = ctx->impl->stack_curr; GRN_API_ENTER; + if (grn_expr_exec_is_simple_expr(ctx, expr)) { + val = grn_expr_exec_simple(ctx, expr); + GRN_API_RETURN(val); + } if (expr->header.type == GRN_PROC) { - grn_proc_call(ctx, expr, nargs, expr); + grn_proc *proc = (grn_proc *)expr; + if (proc->type == GRN_PROC_COMMAND) { + grn_command_input *input; + input = grn_command_input_open(ctx, expr); + grn_command_run(ctx, expr, input); + grn_command_input_close(ctx, input); + GRN_API_RETURN(NULL); + } else { + grn_proc_call(ctx, expr, nargs, expr); + } } else { grn_expr *e = (grn_expr *)expr; register grn_obj **s_ = ctx->impl->stack, *s0 = NULL, *s1 = NULL, **sp, *vp = e->values; @@ -3218,12 +2674,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) case GRN_OP_GET_VALUE : { grn_obj *col, *rec; - grn_obj pat_value; - GRN_TEXT_INIT(&pat_value, 0); do { - uint32_t size; - const char *value; - grn_bool is_pat_key_accessor = GRN_FALSE; if (code->nargs == 1) { rec = v0; if (code->value) { @@ -3245,39 +2696,14 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) col = grn_obj_column(ctx, table, GRN_BULK_HEAD(col), GRN_BULK_VSIZE(col)); if (col) { grn_expr_take_obj(ctx, (grn_obj *)expr, col); } } - if (col) { - grn_obj_reinit_for(ctx, res, col); - if (col->header.type == GRN_ACCESSOR && - ((grn_accessor *)col)->action == GRN_ACCESSOR_GET_KEY && - ((grn_accessor *)col)->obj->header.type == GRN_TABLE_PAT_KEY) { - is_pat_key_accessor = GRN_TRUE; - GRN_BULK_REWIND(&pat_value); - grn_obj_get_value(ctx, col, GRN_RECORD_VALUE(rec), &pat_value); - value = GRN_BULK_HEAD(&pat_value); - size = GRN_BULK_VSIZE(&pat_value); - } else { - value = grn_obj_get_value_(ctx, col, GRN_RECORD_VALUE(rec), - &size); - } - } else { + if (!col) { ERR(GRN_INVALID_ARGUMENT, "col resolve failed"); - GRN_OBJ_FIN(ctx, &pat_value); goto exit; } - if (!is_pat_key_accessor && size == GRN_OBJ_GET_VALUE_IMD) { - GRN_RECORD_SET(ctx, res, (uintptr_t)value); - } else { - if (value) { - if (res->header.type == GRN_VECTOR) { - grn_vector_decode(ctx, res, value, size); - } else { - grn_bulk_write_from(ctx, res, value, 0, size); - } - } - } + grn_obj_reinit_for(ctx, res, col); + grn_obj_get_value(ctx, col, GRN_RECORD_VALUE(rec), res); code++; } while (code < ce && code->op == GRN_OP_GET_VALUE); - GRN_OBJ_FIN(ctx, &pat_value); } break; case GRN_OP_OBJ_SEARCH : @@ -3511,7 +2937,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) POP1(y); POP1(x); WITH_SPSAVE({ - matched = pseudo_query_scan(ctx, x, y); + matched = grn_operator_exec_match(ctx, x, y); }); ALLOC1(res); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); @@ -3521,35 +2947,38 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_EQUAL : { - int r = GRN_FALSE; + grn_bool is_equal; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_EQ(x, y, r); + is_equal = grn_operator_exec_equal(ctx, x, y); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); - GRN_INT32_SET(ctx, res, r); + GRN_INT32_SET(ctx, res, is_equal ? 1 : 0); } code++; break; case GRN_OP_NOT_EQUAL : { - int r = GRN_FALSE; + grn_bool is_not_equal; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_EQ(x, y, r); + is_not_equal = grn_operator_exec_not_equal(ctx, x, y); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); - GRN_INT32_SET(ctx, res, 1 - r); + GRN_INT32_SET(ctx, res, is_not_equal ? 1 : 0); } code++; break; case GRN_OP_PREFIX : { grn_obj *x, *y; - POP2ALLOC1(x, y, res); - GRN_INT32_SET(ctx, res, - (GRN_TEXT_LEN(x) >= GRN_TEXT_LEN(y) && - !memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y)))); - res->header.type = GRN_BULK; - res->header.domain = GRN_DB_INT32; + grn_bool matched; + POP1(y); + POP1(x); + WITH_SPSAVE({ + matched = grn_operator_exec_prefix(ctx, x, y); + }); + ALLOC1(res); + grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); + GRN_INT32_SET(ctx, res, matched ? 1 : 0); } code++; break; @@ -3568,11 +2997,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_LESS : { - int r; + grn_bool r; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_COMPARE(x, y, r, <); - GRN_INT32_SET(ctx, res, r); + r = grn_operator_exec_less(ctx, x, y); + GRN_INT32_SET(ctx, res, r ? 1 : 0); res->header.type = GRN_BULK; res->header.domain = GRN_DB_INT32; } @@ -3580,11 +3009,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_GREATER : { - int r; + grn_bool r; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_COMPARE(x, y, r, >); - GRN_INT32_SET(ctx, res, r); + r = grn_operator_exec_greater(ctx, x, y); + GRN_INT32_SET(ctx, res, r ? 1 : 0); res->header.type = GRN_BULK; res->header.domain = GRN_DB_INT32; } @@ -3592,11 +3021,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_LESS_EQUAL : { - int r; + grn_bool r; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_COMPARE(x, y, r, <=); - GRN_INT32_SET(ctx, res, r); + r = grn_operator_exec_less_equal(ctx, x, y); + GRN_INT32_SET(ctx, res, r ? 1 : 0); res->header.type = GRN_BULK; res->header.domain = GRN_DB_INT32; } @@ -3604,11 +3033,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_GREATER_EQUAL : { - int r; + grn_bool r; grn_obj *x, *y; POP2ALLOC1(x, y, res); - DO_COMPARE(x, y, r, >=); - GRN_INT32_SET(ctx, res, r); + r = grn_operator_exec_greater_equal(ctx, x, y); + GRN_INT32_SET(ctx, res, r ? 1 : 0); res->header.type = GRN_BULK; res->header.domain = GRN_DB_INT32; } @@ -3799,6 +3228,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_PLUS : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "+", INTEGER_ARITHMETIC_OPERATION_PLUS, INTEGER_ARITHMETIC_OPERATION_PLUS, INTEGER_ARITHMETIC_OPERATION_PLUS, @@ -3847,6 +3277,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) ,); } else { ARITHMETIC_BINARY_OPERATION_DISPATCH( + "-", INTEGER_ARITHMETIC_OPERATION_MINUS, INTEGER_ARITHMETIC_OPERATION_MINUS, INTEGER_ARITHMETIC_OPERATION_MINUS, @@ -3865,6 +3296,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_STAR : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "*", INTEGER_ARITHMETIC_OPERATION_STAR, INTEGER_ARITHMETIC_OPERATION_STAR, INTEGER_ARITHMETIC_OPERATION_STAR, @@ -3914,6 +3346,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_BITWISE_OR : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "|", INTEGER_ARITHMETIC_OPERATION_BITWISE_OR, INTEGER_ARITHMETIC_OPERATION_BITWISE_OR, INTEGER_ARITHMETIC_OPERATION_BITWISE_OR, @@ -3925,6 +3358,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_BITWISE_XOR : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "^", INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR, INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR, INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR, @@ -3936,6 +3370,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_BITWISE_AND : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "&", INTEGER_ARITHMETIC_OPERATION_BITWISE_AND, INTEGER_ARITHMETIC_OPERATION_BITWISE_AND, INTEGER_ARITHMETIC_OPERATION_BITWISE_AND, @@ -3947,6 +3382,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_SHIFTL : ARITHMETIC_BINARY_OPERATION_DISPATCH( + "<<", INTEGER_ARITHMETIC_OPERATION_SHIFTL, INTEGER_ARITHMETIC_OPERATION_SHIFTL, INTEGER_ARITHMETIC_OPERATION_SHIFTL, @@ -3958,6 +3394,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_SHIFTR : ARITHMETIC_BINARY_OPERATION_DISPATCH( + ">>", INTEGER_ARITHMETIC_OPERATION_SHIFTR, INTEGER_ARITHMETIC_OPERATION_SHIFTR, INTEGER_ARITHMETIC_OPERATION_SHIFTR, @@ -3969,6 +3406,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) break; case GRN_OP_SHIFTRR : ARITHMETIC_BINARY_OPERATION_DISPATCH( + ">>>", INTEGER8_ARITHMETIC_OPERATION_SHIFTRR, INTEGER16_ARITHMETIC_OPERATION_SHIFTRR, INTEGER32_ARITHMETIC_OPERATION_SHIFTRR, @@ -4010,10 +3448,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) case GRN_OP_NOT : { grn_obj *value; + grn_bool value_boolean; POP1ALLOC1(value, res); + GRN_TRUEP(ctx, value, value_boolean); grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0); - grn_obj_cast(ctx, value, res, GRN_FALSE); - GRN_BOOL_SET(ctx, res, !GRN_BOOL_VALUE(res)); + GRN_BOOL_SET(ctx, res, !value_boolean); } code++; break; @@ -4025,6 +3464,21 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) code++; } break; + case GRN_OP_REGEXP : + { + grn_obj *target, *pattern; + grn_bool matched; + POP1(pattern); + POP1(target); + WITH_SPSAVE({ + matched = grn_operator_exec_regexp(ctx, target, pattern); + }); + ALLOC1(res); + grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0); + GRN_BOOL_SET(ctx, res, matched); + } + code++; + break; default : ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "not implemented operator assigned"); goto exit; @@ -4066,6 +3520,8 @@ grn_expr_get_value(grn_ctx *ctx, grn_obj *expr, int offset) #define DEFAULT_TERM_EXTRACT_POLICY 0 #define DEFAULT_WEIGHT_VECTOR_SIZE 4096 +#define GRN_SCAN_INFO_MAX_N_ARGS 128 + struct _grn_scan_info { uint32_t start; uint32_t end; @@ -4076,14 +3532,20 @@ struct _grn_scan_info { grn_obj wv; grn_obj index; grn_obj *query; - grn_obj *args[8]; + grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS]; int max_interval; int similarity_threshold; + grn_obj scorers; + grn_obj scorer_args_exprs; + grn_obj scorer_args_expr_offsets; }; #define SI_FREE(si) do {\ GRN_OBJ_FIN(ctx, &(si)->wv);\ GRN_OBJ_FIN(ctx, &(si)->index);\ + GRN_OBJ_FIN(ctx, &(si)->scorers);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\ GRN_FREE(si);\ } while (0) @@ -4102,6 +3564,9 @@ struct _grn_scan_info { (si)->max_interval = DEFAULT_MAX_INTERVAL;\ (si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\ (si)->start = (st);\ + GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\ } while (0) static scan_info ** @@ -4145,9 +3610,9 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star } else { s_->flags &= ~SCAN_PUSH; s_->logical_op = op; - memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j)); - memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r)); - memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j)); + grn_memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j)); + grn_memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r)); + grn_memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j)); } break; } @@ -4168,22 +3633,177 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star return sis; } +/* TODO: Remove me if nobody doesn't want to reuse the implementation again. */ +#if 0 +static const char *opstrs[] = { + "PUSH", + "POP", + "NOP", + "CALL", + "INTERN", + "GET_REF", + "GET_VALUE", + "AND", + "AND_NOT", + "OR", + "ASSIGN", + "STAR_ASSIGN", + "SLASH_ASSIGN", + "MOD_ASSIGN", + "PLUS_ASSIGN", + "MINUS_ASSIGN", + "SHIFTL_ASSIGN", + "SHIFTR_ASSIGN", + "SHIFTRR_ASSIGN", + "AND_ASSIGN", + "XOR_ASSIGN", + "OR_ASSIGN", + "JUMP", + "CJUMP", + "COMMA", + "BITWISE_OR", + "BITWISE_XOR", + "BITWISE_AND", + "BITWISE_NOT", + "EQUAL", + "NOT_EQUAL", + "LESS", + "GREATER", + "LESS_EQUAL", + "GREATER_EQUAL", + "IN", + "MATCH", + "NEAR", + "NEAR2", + "SIMILAR", + "TERM_EXTRACT", + "SHIFTL", + "SHIFTR", + "SHIFTRR", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "MOD", + "DELETE", + "INCR", + "DECR", + "INCR_POST", + "DECR_POST", + "NOT", + "ADJUST", + "EXACT", + "LCP", + "PARTIAL", + "UNSPLIT", + "PREFIX", + "SUFFIX", + "GEO_DISTANCE1", + "GEO_DISTANCE2", + "GEO_DISTANCE3", + "GEO_DISTANCE4", + "GEO_WITHINP5", + "GEO_WITHINP6", + "GEO_WITHINP8", + "OBJ_SEARCH", + "EXPR_GET_VAR", + "TABLE_CREATE", + "TABLE_SELECT", + "TABLE_SORT", + "TABLE_GROUP", + "JSON_PUT" +}; + +static void +put_value(grn_ctx *ctx, grn_obj *buf, grn_obj *obj) +{ + int len; + char namebuf[GRN_TABLE_MAX_KEY_SIZE]; + if ((len = grn_column_name(ctx, obj, namebuf, GRN_TABLE_MAX_KEY_SIZE))) { + GRN_TEXT_PUT(ctx, buf, namebuf, len); + } else { + grn_text_otoj(ctx, buf, obj, NULL); + } +} + +static grn_rc +grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr) +{ + uint32_t i, j; + grn_expr_var *var; + grn_expr_code *code; + grn_expr *e = (grn_expr *)expr; + grn_hash *vars = grn_expr_get_vars(ctx, expr, &i); + GRN_TEXT_PUTS(ctx, buf, "noname"); + GRN_TEXT_PUTC(ctx, buf, '('); + { + int i = 0; + grn_obj *value; + const char *name; + uint32_t name_len; + GRN_HASH_EACH(ctx, vars, id, &name, &name_len, &value, { + if (i++) { GRN_TEXT_PUTC(ctx, buf, ','); } + GRN_TEXT_PUT(ctx, buf, name, name_len); + GRN_TEXT_PUTC(ctx, buf, ':'); + put_value(ctx, buf, value); + }); + } + GRN_TEXT_PUTC(ctx, buf, ')'); + GRN_TEXT_PUTC(ctx, buf, '{'); + for (j = 0, code = e->codes; j < e->codes_curr; j++, code++) { + if (j) { GRN_TEXT_PUTC(ctx, buf, ','); } + grn_text_itoa(ctx, buf, code->modify); + if (code->op == GRN_OP_PUSH) { + for (i = 0, var = e->vars; i < e->nvars; i++, var++) { + if (&var->value == code->value) { + GRN_TEXT_PUTC(ctx, buf, '?'); + if (var->name_size) { + GRN_TEXT_PUT(ctx, buf, var->name, var->name_size); + } else { + grn_text_itoa(ctx, buf, (int)i); + } + break; + } + } + if (i == e->nvars) { + put_value(ctx, buf, code->value); + } + } else { + if (code->value) { + put_value(ctx, buf, code->value); + GRN_TEXT_PUTC(ctx, buf, ' '); + } + GRN_TEXT_PUTS(ctx, buf, opstrs[code->op]); + } + } + GRN_TEXT_PUTC(ctx, buf, '}'); + return GRN_SUCCESS; +} #define EXPRLOG(name,expr) do {\ grn_obj strbuf;\ GRN_TEXT_INIT(&strbuf, 0);\ - grn_expr_inspect(ctx, &strbuf, (expr));\ + grn_expr_inspect_internal(ctx, &strbuf, (expr));\ GRN_TEXT_PUTC(ctx, &strbuf, '\0');\ GRN_LOG(ctx, GRN_LOG_NOTICE, "%s=(%s)", (name), GRN_TEXT_VALUE(&strbuf));\ GRN_OBJ_FIN(ctx, &strbuf);\ } while (0) +#endif + static void -scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { GRN_PTR_PUT(ctx, &si->index, index); GRN_UINT32_PUT(ctx, &si->wv, sid); GRN_INT32_PUT(ctx, &si->wv, weight); + GRN_PTR_PUT(ctx, &si->scorers, scorer); + GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr); + GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset); { int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1; grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni); @@ -4191,10 +3811,10 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i if (index == pi[-1]) { if (i) { int32_t *pw = &GRN_INT32_VALUE_AT(&si->wv, (ni - i) * 2); - memmove(pw + 2, pw, sizeof(int32_t) * 2 * i); + grn_memmove(pw + 2, pw, sizeof(int32_t) * 2 * i); pw[0] = (int32_t) sid; pw[1] = weight; - memmove(pi + 1, pi, sizeof(grn_obj *) * i); + grn_memmove(pi + 1, pi, sizeof(grn_obj *) * i); pi[0] = index; } return; @@ -4204,10 +3824,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i } static int32_t -get_weight(grn_ctx *ctx, grn_expr_code *ec) +get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { if (ec->modify == 2 && ec[2].op == GRN_OP_STAR && ec[1].value && ec[1].value->header.type == GRN_BULK) { + if (offset) { + *offset = 2; + } if (ec[1].value->header.domain == GRN_DB_INT32 || ec[1].value->header.domain == GRN_DB_UINT32) { return GRN_INT32_VALUE(ec[1].value); @@ -4222,6 +3845,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec) return weight; } } else { + if (offset) { + *offset = 0; + } return 1; } } @@ -4243,6 +3869,9 @@ grn_scan_info_open(grn_ctx *ctx, int start) si->max_interval = DEFAULT_MAX_INTERVAL; si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; si->start = start; + GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR); return si; } @@ -4254,9 +3883,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si) } void -grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { - scan_info_put_index(ctx, si, index, sid, weight); + scan_info_put_index(ctx, si, index, sid, weight, + scorer, + scorer_args_expr, + scorer_args_expr_offset); } scan_info ** @@ -4267,9 +3903,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, } int32_t -grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec) +grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { - return get_weight(ctx, ec); + return get_weight(ctx, ec, offset); } int @@ -4347,7 +3983,7 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold) grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg) { - if (si->nargs >= 8) { + if (si->nargs >= GRN_SCAN_INFO_MAX_N_ARGS) { return GRN_FALSE; } @@ -4365,36 +4001,324 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i) } static uint32_t -scan_info_build_find_index_column_index(grn_ctx *ctx, - scan_info *si, - grn_expr_code *ec, - uint32_t n_rest_codes, - grn_operator op) +scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si, + grn_expr *expr, uint32_t i, + grn_obj **index, + int *sid) { - uint32_t offset = 0; - grn_obj *index; + grn_expr_code *ec; + uint32_t offset = 1; + grn_index_datum index_datum; + unsigned int n_index_data = 0; + + ec = &(expr->codes[i]); + switch (ec->value->header.type) { + case GRN_ACCESSOR : + n_index_data = grn_column_find_index_data(ctx, ec->value, si->op, + &index_datum, 1); + if (n_index_data > 0) { + grn_accessor *a = (grn_accessor *)(ec->value); + *sid = index_datum.section; + if (a->next && a->obj != index_datum.index) { + *index = ec->value; + } else { + *index = index_datum.index; + } + } + break; + case GRN_COLUMN_FIX_SIZE : + case GRN_COLUMN_VAR_SIZE : + n_index_data = grn_column_find_index_data(ctx, ec->value, si->op, + &index_datum, 1); + if (n_index_data > 0) { + *index = index_datum.index; + *sid = index_datum.section; + } + break; + case GRN_COLUMN_INDEX : + { + uint32_t n_rest_codes; + + *index = ec->value; + + n_rest_codes = expr->codes_curr - i; + if (n_rest_codes >= 2 && + ec[1].value && + (ec[1].value->header.domain == GRN_DB_INT32 || + ec[1].value->header.domain == GRN_DB_UINT32) && + ec[2].op == GRN_OP_GET_MEMBER) { + if (ec[1].value->header.domain == GRN_DB_INT32) { + *sid = GRN_INT32_VALUE(ec[1].value) + 1; + } else { + *sid = GRN_UINT32_VALUE(ec[1].value) + 1; + } + offset += 2; + } + } + break; + default : + break; + } + + return offset; +} + +static uint32_t +scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, + grn_expr *expr, uint32_t i) +{ + grn_expr_code *ec; + grn_obj *index = NULL; int sid = 0; - int32_t weight = 0; - - index = ec->value; - if (n_rest_codes > 2 && - ec[1].value && - (ec[1].value->header.domain == GRN_DB_INT32 || - ec[1].value->header.domain == GRN_DB_UINT32) && - ec[2].op == GRN_OP_GET_MEMBER) { - if (ec[1].value->header.domain == GRN_DB_INT32) { - sid = GRN_INT32_VALUE(ec[1].value) + 1; + uint32_t offset = 0; + + ec = &(expr->codes[i]); + if (!ec->value) { + return i + 1; + } + + switch (ec->value->header.type) { + case GRN_ACCESSOR : + case GRN_COLUMN_FIX_SIZE : + case GRN_COLUMN_VAR_SIZE : + case GRN_COLUMN_INDEX : + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset - 1; + if (index) { + if (ec->value->header.type == GRN_ACCESSOR) { + si->flags |= SCAN_ACCESSOR; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + NULL, NULL, 0); + i += offset; + } + break; + case GRN_PROC : + if (!grn_obj_is_scorer_proc(ctx, ec->value)) { + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect(ctx, &inspected, ec->value); + ERR(GRN_INVALID_ARGUMENT, + "procedure must be scorer: <%.*s>", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); + return expr->codes_curr; + } + i++; + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset; + if (index) { + uint32_t scorer_args_expr_offset = 0; + if (expr->codes[i].op != GRN_OP_CALL) { + scorer_args_expr_offset = i; + } + while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) { + i++; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + ec->value, + (grn_obj *)expr, + scorer_args_expr_offset); + i += offset; + } + break; + default : + { + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size; + name_size = grn_obj_name(ctx, ec->value, name, GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_INVALID_ARGUMENT, + "invalid match target: <%.*s>", + name_size, name); + return expr->codes_curr; + } + break; + } + + return i + 1; +} + +static void +scan_info_build_match_expr(grn_ctx *ctx, scan_info *si, grn_expr *expr) +{ + uint32_t i; + i = 0; + while (i < expr->codes_curr) { + i = scan_info_build_match_expr_codes(ctx, si, expr, i); + } +} + +static grn_bool +is_index_searchable_regexp(grn_ctx *ctx, grn_obj *regexp) +{ + const char *regexp_raw; + const char *regexp_raw_end; + grn_bool escaping = GRN_FALSE; + + if (!(regexp->header.domain == GRN_DB_SHORT_TEXT || + regexp->header.domain == GRN_DB_TEXT || + regexp->header.domain == GRN_DB_LONG_TEXT)) { + return GRN_FALSE; + } + + regexp_raw = GRN_TEXT_VALUE(regexp); + regexp_raw_end = regexp_raw + GRN_TEXT_LEN(regexp); + + while (regexp_raw < regexp_raw_end) { + unsigned int char_len; + + char_len = grn_charlen(ctx, regexp_raw, regexp_raw_end); + if (char_len == 0) { + return GRN_FALSE; + } + + if (char_len == 1) { + if (escaping) { + escaping = GRN_FALSE; + switch (regexp_raw[0]) { + case 'Z' : + case 'b' : + case 'B' : + case 'd' : + case 'D' : + case 'h' : + case 'H' : + case 'p' : + case 's' : + case 'S' : + case 'w' : + case 'W' : + case 'X' : + case 'k' : + case 'g' : + case '1' : + case '2' : + case '3' : + case '4' : + case '5' : + case '6' : + case '7' : + case '8' : + case '9' : + return GRN_FALSE; + default : + break; + } + } else { + switch (regexp_raw[0]) { + case '.' : + case '[' : + case ']' : + case '|' : + case '?' : + case '+' : + case '*' : + case '{' : + case '}' : + case '^' : + case '$' : + case '(' : + case ')' : + escaping = GRN_FALSE; + return GRN_FALSE; + case '\\' : + escaping = GRN_TRUE; + break; + default : + escaping = GRN_FALSE; + break; + } + } } else { - sid = GRN_UINT32_VALUE(ec[1].value) + 1; + escaping = GRN_FALSE; } - offset = 2; - weight = get_weight(ctx, ec + offset); - } else { - weight = get_weight(ctx, ec); + + regexp_raw += char_len; } - scan_info_put_index(ctx, si, index, sid, weight); - return offset; + return GRN_TRUE; +} + +static void +scan_info_build_match(grn_ctx *ctx, scan_info *si) +{ + grn_obj **p, **pe; + + if (si->op == GRN_OP_REGEXP) { + p = si->args; + pe = si->args + si->nargs; + for (; p < pe; p++) { + if ((*p)->header.type == GRN_BULK && + !is_index_searchable_regexp(ctx, *p)) { + return; + } + } + } + + p = si->args; + pe = si->args + si->nargs; + for (; p < pe; p++) { + if ((*p)->header.type == GRN_EXPR) { + scan_info_build_match_expr(ctx, si, (grn_expr *)(*p)); + } else if (GRN_DB_OBJP(*p)) { + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, *p, si->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); + } + } else if (GRN_ACCESSORP(*p)) { + grn_index_datum index_datum; + unsigned int n_index_data; + si->flags |= SCAN_ACCESSOR; + n_index_data = grn_column_find_index_data(ctx, *p, si->op, + &index_datum, 1); + if (n_index_data > 0) { + grn_obj *index; + if (((grn_accessor *)(*p))->next) { + index = *p; + } else { + index = index_datum.index; + } + scan_info_put_index(ctx, si, + index, index_datum.section, 1, + NULL, NULL, 0); + } + } else { + switch (si->op) { + case GRN_OP_NEAR : + case GRN_OP_NEAR2 : + if (si->nargs == 3 && + *p == si->args[2] && + (*p)->header.domain == GRN_DB_INT32) { + si->max_interval = GRN_INT32_VALUE(*p); + } else { + si->query = *p; + } + break; + case GRN_OP_SIMILAR : + if (si->nargs == 3 && + *p == si->args[2] && + (*p)->header.domain == GRN_DB_INT32) { + si->similarity_threshold = GRN_INT32_VALUE(*p); + } else { + si->query = *p; + } + break; + default : + si->query = *p; + break; + } + } + } } static scan_info ** @@ -4431,10 +4355,26 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case GRN_OP_GEO_WITHINP6 : case GRN_OP_GEO_WITHINP8 : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; } stat = SCAN_START; m++; break; + case GRN_OP_BITWISE_OR : + case GRN_OP_BITWISE_XOR : + case GRN_OP_BITWISE_AND : + case GRN_OP_BITWISE_NOT : + case GRN_OP_SHIFTL : + case GRN_OP_SHIFTR : + case GRN_OP_SHIFTRR : + case GRN_OP_PLUS : + case GRN_OP_MINUS : + case GRN_OP_STAR : + case GRN_OP_MOD : + if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; } + stat = SCAN_START; + if (m != o + 1) { return NULL; } + break; case GRN_OP_AND : case GRN_OP_OR : case GRN_OP_AND_NOT : @@ -4496,90 +4436,17 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case GRN_OP_GEO_WITHINP6 : case GRN_OP_GEO_WITHINP8 : case GRN_OP_TERM_EXTRACT : + case GRN_OP_REGEXP : stat = SCAN_START; si->op = c->op; si->end = c - e->codes; sis[i++] = si; - { - int sid; - grn_obj *index, **p = si->args, **pe = si->args + si->nargs; - for (; p < pe; p++) { - if ((*p)->header.type == GRN_EXPR) { - uint32_t j; - grn_expr_code *ec; - grn_expr *e = (grn_expr *)(*p); - for (j = e->codes_curr, ec = e->codes; j--; ec++) { - if (ec->value) { - switch (ec->value->header.type) { - case GRN_ACCESSOR : - if (grn_column_index(ctx, ec->value, c->op, &index, 1, &sid)) { - int32_t weight = get_weight(ctx, ec); - si->flags |= SCAN_ACCESSOR; - if (((grn_accessor *)ec->value)->next) { - scan_info_put_index(ctx, si, ec->value, sid, weight); - } else { - scan_info_put_index(ctx, si, index, sid, weight); - } - } - break; - case GRN_COLUMN_FIX_SIZE : - case GRN_COLUMN_VAR_SIZE : - if (grn_column_index(ctx, ec->value, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec)); - } - break; - case GRN_COLUMN_INDEX : - { - uint32_t offset; - offset = scan_info_build_find_index_column_index(ctx, si, ec, - j, c->op); - j -= offset; - ec += offset; - } - break; - } - } - } - } else if (GRN_DB_OBJP(*p)) { - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); - } - } else if (GRN_ACCESSORP(*p)) { - si->flags |= SCAN_ACCESSOR; - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - if (((grn_accessor *)(*p))->next) { - scan_info_put_index(ctx, si, *p, sid, 1); - } else { - scan_info_put_index(ctx, si, index, sid, 1); - } - } - } else { - switch (c->op) { - case GRN_OP_NEAR : - case GRN_OP_NEAR2 : - if (si->nargs == 3 && - *p == si->args[2] && - (*p)->header.domain == GRN_DB_INT32) { - si->max_interval = GRN_INT32_VALUE(*p); - } else { - si->query = *p; - } - break; - case GRN_OP_SIMILAR : - if (si->nargs == 3 && - *p == si->args[2] && - (*p)->header.domain == GRN_DB_INT32) { - si->similarity_threshold = GRN_INT32_VALUE(*p); - } else { - si->query = *p; - } - break; - default : - si->query = *p; - break; - } - } - } + scan_info_build_match(ctx, si); + if (ctx->rc != GRN_SUCCESS) { + int j; + for (j = 0; j < i; j++) { SI_FREE(sis[j]); } + GRN_FREE(sis); + return NULL; } si = NULL; break; @@ -4595,7 +4462,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, if (c->value == var) { stat = SCAN_VAR; } else { - if (si->nargs < 8) { + if (si->nargs < GRN_SCAN_INFO_MAX_N_ARGS) { si->args[si->nargs++] = c->value; } if (stat == SCAN_START) { si->flags |= SCAN_PRE_CONST; } @@ -4610,7 +4477,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, case SCAN_CONST : case SCAN_VAR : stat = SCAN_COL1; - if (si->nargs < 8) { + if (si->nargs < GRN_SCAN_INFO_MAX_N_ARGS) { si->args[si->nargs++] = c->value; } break; @@ -4628,6 +4495,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, "invalid expression: can't use column as a value: %.*s", (int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected)); GRN_OBJ_FIN(ctx, &inspected); + SI_FREE(si); for (j = 0; j < i; j++) { SI_FREE(sis[j]); } GRN_FREE(sis); return NULL; @@ -4649,17 +4517,28 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, sis[i++] = si; /* better index resolving framework for functions should be implemented */ { - int sid; - grn_obj *index, **p = si->args, **pe = si->args + si->nargs; + grn_obj **p = si->args, **pe = si->args + si->nargs; for (; p < pe; p++) { if (GRN_DB_OBJP(*p)) { - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, *p, c->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); } } else if (GRN_ACCESSORP(*p)) { + grn_index_datum index_datum; + unsigned int n_index_data; si->flags |= SCAN_ACCESSOR; - if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + n_index_data = grn_column_find_index_data(ctx, *p, c->op, + &index_datum, 1); + if (n_index_data > 0) { + scan_info_put_index(ctx, si, + index_datum.index, index_datum.section, 1, + NULL, NULL, 0); } } else { si->query = *p; @@ -4694,6 +4573,43 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, return sis; } +void +grn_inspect_scan_info_list(grn_ctx *ctx, grn_obj *buffer, scan_info **sis, int n) +{ + int i; + + for (i = 0; i < n; i++) { + scan_info *si = sis[i]; + + grn_text_printf(ctx, buffer, "[%d]\n", i); + grn_text_printf(ctx, buffer, + " op: <%s>\n", + grn_operator_to_string(si->op)); + grn_text_printf(ctx, buffer, + " logical_op: <%s>\n", + grn_operator_to_string(si->logical_op)); + + GRN_TEXT_PUTS(ctx, buffer, " query: <"); + grn_inspect(ctx, buffer, si->query); + GRN_TEXT_PUTS(ctx, buffer, ">\n"); + + grn_text_printf(ctx, buffer, + " expr: <%d..%d>\n", si->start, si->end); + } +} + +void +grn_p_scan_info_list(grn_ctx *ctx, scan_info **sis, int n) +{ + grn_obj inspected; + GRN_TEXT_INIT(&inspected, 0); + grn_inspect_scan_info_list(ctx, &inspected, sis, n); + printf("%.*s\n", + (int)GRN_TEXT_LEN(&inspected), + GRN_TEXT_VALUE(&inspected)); + GRN_OBJ_FIN(ctx, &inspected); +} + inline static int32_t exec_result_to_score(grn_ctx *ctx, grn_obj *result, grn_obj *score_buffer) { @@ -4719,8 +4635,8 @@ exec_result_to_score(grn_ctx *ctx, grn_obj *result, grn_obj *score_buffer) } static void -grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v, - grn_obj *res, grn_operator op) +grn_table_select_sequential(grn_ctx *ctx, grn_obj *table, grn_obj *expr, + grn_obj *v, grn_obj *res, grn_operator op) { int32_t score; grn_id id, *idp; @@ -4737,6 +4653,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v, while ((id = grn_table_cursor_next(ctx, tc))) { GRN_RECORD_SET(ctx, v, id); r = grn_expr_exec(ctx, expr, 0); + if (ctx->rc) { + break; + } score = exec_result_to_score(ctx, r, &score_buffer); if (score > 0) { grn_rset_recinfo *ri; @@ -4754,6 +4673,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v, grn_hash_cursor_get_key(ctx, hc, (void **) &idp); GRN_RECORD_SET(ctx, v, *idp); r = grn_expr_exec(ctx, expr, 0); + if (ctx->rc) { + break; + } score = exec_result_to_score(ctx, r, &score_buffer); if (score > 0) { grn_rset_recinfo *ri; @@ -4772,6 +4694,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v, grn_hash_cursor_get_key(ctx, hc, (void **) &idp); GRN_RECORD_SET(ctx, v, *idp); r = grn_expr_exec(ctx, expr, 0); + if (ctx->rc) { + break; + } score = exec_result_to_score(ctx, r, &score_buffer); if (score > 0) { grn_hash_cursor_delete(ctx, hc, NULL); @@ -4786,6 +4711,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v, grn_hash_cursor_get_key(ctx, hc, (void **) &idp); GRN_RECORD_SET(ctx, v, *idp); r = grn_expr_exec(ctx, expr, 0); + if (ctx->rc) { + break; + } score = exec_result_to_score(ctx, r, &score_buffer); if (score > 0) { grn_rset_recinfo *ri; @@ -4854,13 +4782,29 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table, min, min_size, max, max_size, offset, limit, flags); if (cursor) { - grn_id index_id; - while ((index_id = grn_table_cursor_next(ctx, cursor))) { - grn_ii_at(ctx, (grn_ii *)index, index_id, - (grn_hash *)res, logical_op); + uint32_t sid; + int32_t weight; + grn_obj *index_cursor; + + sid = GRN_UINT32_VALUE_AT(&(si->wv), 0); + weight = GRN_INT32_VALUE_AT(&(si->wv), 1); + index_cursor = grn_index_cursor_open(ctx, cursor, index, + GRN_ID_NIL, GRN_ID_MAX, 0); + if (index_cursor) { + grn_posting *posting; + while ((posting = grn_index_cursor_next(ctx, index_cursor, NULL))) { + if (sid == 0 || posting->sid == sid) { + grn_ii_posting ii_posting; + ii_posting.rid = posting->rid; + ii_posting.sid = posting->sid; + ii_posting.weight = posting->weight * weight; + grn_ii_posting_add(ctx, &ii_posting, (grn_hash *)res, logical_op); + } + } + processed = GRN_TRUE; + grn_obj_unlink(ctx, index_cursor); } grn_table_cursor_close(ctx, cursor); - processed = GRN_TRUE; } grn_ii_resolve_sel_and(ctx, (grn_hash *)res, logical_op); @@ -4913,11 +4857,21 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table, { int i; + grn_obj weight_vector; + grn_search_optarg optarg; + GRN_INT32_INIT(&weight_vector, GRN_OBJ_VECTOR); + memset(&optarg, 0, sizeof(grn_search_optarg)); + if (si->op == GRN_OP_MATCH) { + optarg.mode = GRN_OP_EXACT; + } else { + optarg.mode = si->op; + } for (i = n_accessors - 1; i > 0; i--) { grn_rc rc = GRN_SUCCESS; grn_accessor *accessor; grn_obj *index; + int section; grn_obj *domain; grn_obj *target; grn_obj *next_res; @@ -4926,10 +4880,34 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table, accessor = (grn_accessor *)GRN_PTR_VALUE_AT(accessor_stack, i - 1); target = accessor->obj; - if (grn_column_index(ctx, target, GRN_OP_EQUAL, &index, 1, NULL) == 0) { - grn_obj_unlink(ctx, current_res); - current_res = NULL; - break; + { + grn_index_datum index_datum; + unsigned int n_index_data; + n_index_data = grn_column_find_index_data(ctx, target, GRN_OP_EQUAL, + &index_datum, 1); + if (n_index_data == 0) { + grn_obj_unlink(ctx, current_res); + current_res = NULL; + break; + } + index = index_datum.index; + section = index_datum.section; + } + + if (section > 0) { + int j; + int weight_position = section - 1; + + GRN_BULK_REWIND(&weight_vector); + GRN_INT32_SET_AT(ctx, &weight_vector, weight_position, 1); + optarg.weight_vector = &(GRN_INT32_VALUE(&weight_vector)); + optarg.vector_size = GRN_BULK_VSIZE(&weight_vector) / sizeof(int32_t); + for (j = 0; j < weight_position - 1; j++) { + optarg.weight_vector[j] = 0; + } + } else { + optarg.weight_vector = NULL; + optarg.vector_size = 1; } { @@ -4954,14 +4932,14 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table, if (domain->header.type == GRN_TABLE_NO_KEY) { rc = grn_ii_sel(ctx, (grn_ii *)index, (const char *)next_record_id, sizeof(grn_id), - (grn_hash *)next_res, next_op, NULL); + (grn_hash *)next_res, next_op, &optarg); } else { char key[GRN_TABLE_MAX_KEY_SIZE]; int key_len; key_len = grn_table_get_key(ctx, domain, *next_record_id, key, GRN_TABLE_MAX_KEY_SIZE); rc = grn_ii_sel(ctx, (grn_ii *)index, key, key_len, - (grn_hash *)next_res, next_op, NULL); + (grn_hash *)next_res, next_op, &optarg); } if (rc != GRN_SUCCESS) { break; @@ -4986,6 +4964,7 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table, break; } } + GRN_OBJ_FIN(ctx, &weight_vector); } return current_res == res; @@ -5085,7 +5064,10 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, GRN_BULK_HEAD(si->query), GRN_BULK_VSIZE(si->query)); } - grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, si->logical_op); + if (tid != GRN_ID_NIL) { + grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, + si->logical_op); + } } grn_ii_resolve_sel_and(ctx, (grn_hash *)res, si->logical_op); processed = GRN_TRUE; @@ -5173,9 +5155,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, case GRN_OP_NEAR : case GRN_OP_NEAR2 : case GRN_OP_SIMILAR : + case GRN_OP_REGEXP : { grn_obj wv, **ip = &GRN_PTR_VALUE(&si->index); - int j = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); + int j; + int n_indexes = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); int32_t *wp = &GRN_INT32_VALUE(&si->wv); grn_search_optarg optarg; GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR); @@ -5203,19 +5187,30 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, optarg.proc = NULL; optarg.max_size = 0; ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; - for (; j--; ip++, wp += 2) { + for (j = 0; j < n_indexes; j++, ip++, wp += 2) { uint32_t sid = (uint32_t) wp[0]; int32_t weight = wp[1]; if (sid) { int weight_index = sid - 1; - GRN_INT32_SET_AT(ctx, &wv, weight_index, weight); + int current_vector_size; + current_vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t); + if (weight_index < current_vector_size) { + ((int *)GRN_BULK_HEAD(&wv))[weight_index] = weight; + } else { + GRN_INT32_SET_AT(ctx, &wv, weight_index, weight); + } optarg.weight_vector = &GRN_INT32_VALUE(&wv); optarg.vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t); } else { optarg.weight_vector = NULL; optarg.vector_size = weight; } - if (j) { + optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j); + optarg.scorer_args_expr = + GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j); + optarg.scorer_args_expr_offset = + GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j); + if (j < n_indexes - 1) { if (sid && ip[0] == ip[1]) { continue; } } else { ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; @@ -5250,7 +5245,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, } break; case GRN_OP_CALL : - if (selector_proc_p(si->args[0])) { + if (grn_obj_is_selector_proc(ctx, si->args[0])) { grn_rc rc; grn_proc *proc = (grn_proc *)(si->args[0]); rc = proc->selector(ctx, table, index, si->nargs, si->args, @@ -5276,7 +5271,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, } else { switch (si->op) { case GRN_OP_CALL : - if (selector_proc_p(si->args[0])) { + if (grn_obj_is_selector_proc(ctx, si->args[0])) { grn_rc rc; grn_proc *proc = (grn_proc *)(si->args[0]); rc = proc->selector(ctx, table, NULL, si->nargs, si->args, @@ -5361,11 +5356,13 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, if (ctx->rc) { break; } e->codes = codes + si->start; e->codes_curr = si->end - si->start + 1; - grn_table_select_(ctx, table, expr, v, res, si->logical_op); + grn_table_select_sequential(ctx, table, expr, v, + res, si->logical_op); } } GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE, ":", "filter(%d)", grn_table_size(ctx, res)); + if (ctx->rc) { break; } } for (i = 0; i < n; i++) { scan_info *si = sis[i]; @@ -5377,7 +5374,7 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, e->codes_curr = codes_curr; } else { if (!ctx->rc) { - grn_table_select_(ctx, table, expr, v, res, op); + grn_table_select_sequential(ctx, table, expr, v, res, op); } } } @@ -5408,7 +5405,7 @@ grn_int32_value_at(grn_obj *obj, int offset) /* grn_expr_create_from_str */ -#include "snip.h" +#include "grn_snip.h" typedef struct { grn_ctx *ctx; @@ -5457,239 +5454,6 @@ skip_space(grn_ctx *ctx, efs_info *q) } } -static grn_rc get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode); -static grn_rc get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode); - -static grn_rc -get_phrase(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option) -{ - const char *start, *s; - start = s = q->cur; - GRN_BULK_REWIND(&q->buf); - while (1) { - unsigned int len; - if (s >= q->str_end) { - q->cur = s; - break; - } - len = grn_charlen(ctx, s, q->str_end); - if (len == 0) { - /* invalid string containing malformed multibyte char */ - return GRN_END_OF_DATA; - } else if (len == 1) { - if (*s == GRN_QUERY_QUOTER) { - q->cur = s + 1; - break; - } else if (*s == GRN_QUERY_ESCAPE && s + 1 < q->str_end) { - s++; - len = grn_charlen(ctx, s, q->str_end); - } - } - GRN_TEXT_PUT(ctx, &q->buf, s, len); - s += len; - } - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const(ctx, q->e, &q->buf, GRN_OP_PUSH, 1); - if (mode == GRN_OP_MATCH || mode == GRN_OP_EXACT) { - grn_expr_append_op(ctx, q->e, mode, 2); - } else { - grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, mode, 3); - } - return GRN_SUCCESS; -} - -static grn_rc -get_geocond(grn_ctx *ctx, efs_info *q, grn_obj *longitude, grn_obj *latitude) -{ - unsigned int len; - const char *start = q->cur, *end; - for (end = q->cur;; ) { - /* null check and length check */ - if (!(len = grn_charlen(ctx, end, q->str_end))) { - q->cur = q->str_end; - break; - } - if (grn_isspace(end, ctx->encoding) || - *end == GRN_QUERY_PARENR) { - q->cur = end; - break; - } - } - { - const char *tokbuf[8]; - int32_t lng0, lat0, lng1, lat1, lng2, lat2, r; - int32_t n = grn_str_tok((char *)start, end - start, ',', tokbuf, 8, NULL); - switch (n) { - case 3 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - r = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, r, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP5, 5); - break; - case 4 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP6, 6); - break; - case 6 : - lng0 = grn_atoi(start, tokbuf[0], NULL); - lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL); - lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL); - lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL); - lng2 = grn_atoi(tokbuf[3] + 1, tokbuf[4], NULL); - lat2 = grn_atoi(tokbuf[4] + 1, tokbuf[5], NULL); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lng2, GRN_OP_PUSH, 1); - grn_expr_append_const_int(ctx, q->e, lat2, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP8, 8); - break; - default : - ERR(GRN_INVALID_ARGUMENT, "invalid geocond"); - break; - } - } - return ctx->rc; -} - -static grn_rc -get_word(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option) -{ - const char *start = q->cur, *end; - unsigned int len; - for (end = q->cur;; ) { - /* null check and length check */ - if (!(len = grn_charlen(ctx, end, q->str_end))) { - q->cur = q->str_end; - break; - } - if (grn_isspace(end, ctx->encoding) || - *end == GRN_QUERY_PARENR) { - q->cur = end; - break; - } - if (*end == GRN_QUERY_COLUMN) { - grn_obj *c = grn_obj_column(ctx, q->table, start, end - start); - if (c && end + 1 < q->str_end) { - efs_op op; - switch (end[1]) { - case '!' : - mode = GRN_OP_NOT_EQUAL; - q->cur = end + 2; - break; - case '=' : - if (q->flags & GRN_EXPR_ALLOW_UPDATE) { - mode = GRN_OP_ASSIGN; - q->cur = end + 2; - } else { - get_token(ctx, q, &op, c, mode); - } - break; - case '<' : - if (end + 2 < q->str_end && end[2] == '=') { - mode = GRN_OP_LESS_EQUAL; - q->cur = end + 3; - } else { - mode = GRN_OP_LESS; - q->cur = end + 2; - } - break; - case '>' : - if (end + 2 < q->str_end && end[2] == '=') { - mode = GRN_OP_GREATER_EQUAL; - q->cur = end + 3; - } else { - mode = GRN_OP_GREATER; - q->cur = end + 2; - } - break; - case '%' : - mode = GRN_OP_MATCH; - q->cur = end + 2; - break; - case '@' : - q->cur = end + 2; - return get_geocond(ctx, q, column, c); - break; - default : - mode = GRN_OP_EQUAL; - q->cur = end + 1; - break; - } - return get_token(ctx, q, &op, c, mode); - } else { - ERR(GRN_INVALID_ARGUMENT, "column lookup failed"); - return ctx->rc; - } - } else if (*end == GRN_QUERY_PREFIX) { - mode = GRN_OP_PREFIX; - q->cur = end + 1; - break; - } - end += len; - } - if (!column) { - ERR(GRN_INVALID_ARGUMENT, "column missing"); - return ctx->rc; - } - if (mode == GRN_OP_ASSIGN) { - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_ASSIGN, 2); - } else { - grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1); - grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2); - grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1); - switch (mode) { - case GRN_OP_NEAR : - case GRN_OP_NEAR2 : - case GRN_OP_SIMILAR : - case GRN_OP_TERM_EXTRACT : - grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, mode, 3); - break; - default : - grn_expr_append_op(ctx, q->e, mode, 2); - break; - } - } - return GRN_SUCCESS; -} - static grn_bool get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option) { @@ -5738,90 +5502,6 @@ get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option) return found; } -static grn_rc -get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode) -{ - int option = 0; - op->op = q->default_op; - op->weight = DEFAULT_WEIGHT; - for (;;) { - skip_space(ctx, q); - if (q->cur >= q->str_end) { return GRN_END_OF_DATA; } - switch (*q->cur) { - case '\0' : - return GRN_END_OF_DATA; - break; - case GRN_QUERY_PARENR : - q->cur++; - return GRN_END_OF_DATA; - break; - case GRN_QUERY_QUOTEL : - q->cur++; - return get_phrase(ctx, q, column, mode, option); - break; - case GRN_QUERY_PREFIX : - q->cur++; - get_op(q, op, &mode, &option); - break; - case GRN_QUERY_AND : - q->cur++; - op->op = GRN_OP_AND; - break; - case GRN_QUERY_AND_NOT : - q->cur++; - op->op = GRN_OP_AND_NOT; - break; - case GRN_QUERY_ADJ_INC : - q->cur++; - if (op->weight < 127) { op->weight++; } - op->op = GRN_OP_ADJUST; - break; - case GRN_QUERY_ADJ_DEC : - q->cur++; - if (op->weight > -128) { op->weight--; } - op->op = GRN_OP_ADJUST; - break; - case GRN_QUERY_ADJ_NEG : - q->cur++; - op->op = GRN_OP_ADJUST; - op->weight = -1; - break; - case GRN_QUERY_PARENL : - q->cur++; - return get_expr(ctx, q, column, mode); - break; - case 'O' : - if (q->cur[1] == 'R' && q->cur[2] == ' ') { - q->cur += 2; - op->op = GRN_OP_OR; - break; - } - /* fallthru */ - default : - return get_word(ctx, q, column, mode, option); - break; - } - } - return GRN_SUCCESS; -} - -static grn_rc -get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode) -{ - efs_op op; - grn_rc rc = get_token(ctx, q, &op, column, mode); - if (rc) { return rc; } - while (!(rc = get_token(ctx, q, &op, column, mode))) { - if (op.op == GRN_OP_ADJUST) { - grn_expr_append_const_int(ctx, q->e, op.weight, GRN_OP_PUSH, 1); - grn_expr_append_op(ctx, q->e, op.op, 3); - } else { - grn_expr_append_op(ctx, q->e, op.op, 2); - } - } - return rc; -} - #define DISABLE_UNUSED_CODE 1 #ifndef DISABLE_UNUSED_CODE static const char * @@ -5936,8 +5616,8 @@ section_weight_cb(grn_ctx *ctx, grn_hash *r, const void *rid, int sid, void *arg } #endif -#include "ecmascript.h" -#include "ecmascript.c" +#include "grn_ecmascript.h" +#include "grn_ecmascript.c" static grn_rc grn_expr_parser_open(grn_ctx *ctx) @@ -6072,6 +5752,10 @@ get_word_(grn_ctx *ctx, efs_info *q) mode = GRN_OP_SUFFIX; q->cur = end + 2; break; + case '~' : + mode = GRN_OP_REGEXP; + q->cur = end + 2; + break; default : mode = GRN_OP_EQUAL; q->cur = end + 1; @@ -6090,7 +5774,7 @@ get_word_(grn_ctx *ctx, efs_info *q) GRN_INT32_PUT(ctx, &q->mode_stack, mode); return GRN_SUCCESS; - } else if (GRN_TEXT_LEN(&q->buf) > 1 && *end == GRN_QUERY_PREFIX) { + } else if (GRN_TEXT_LEN(&q->buf) > 0 && *end == GRN_QUERY_PREFIX) { q->cur = end + 1; GRN_INT32_PUT(ctx, &q->mode_stack, GRN_OP_PREFIX); break; @@ -6484,6 +6168,10 @@ parse_script(grn_ctx *ctx, efs_info *q) PARSE(GRN_EXPR_TOKEN_SUFFIX); q->cur += 2; break; + case '~' : + PARSE(GRN_EXPR_TOKEN_REGEXP); + q->cur += 2; + break; default : PARSE(GRN_EXPR_TOKEN_MATCH); q->cur++; @@ -6912,7 +6600,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr, /* grn_obj strbuf; GRN_TEXT_INIT(&strbuf, 0); - grn_expr_inspect(ctx, &strbuf, expr); + grn_expr_inspect_internal(ctx, &strbuf, expr); GRN_TEXT_PUTC(ctx, &strbuf, '\0'); GRN_LOG(ctx, GRN_LOG_NOTICE, "query=(%s)", GRN_TEXT_VALUE(&strbuf)); GRN_OBJ_FIN(ctx, &strbuf); @@ -7156,3 +6844,63 @@ grn_expr_syntax_escape_query(grn_ctx *ctx, const char *query, int query_size, target_characters, GRN_QUERY_ESCAPE, escaped_query); } + +grn_rc +grn_expr_dump_plan(grn_ctx *ctx, grn_obj *expr, grn_obj *buffer) +{ + int n; + scan_info **sis; + + GRN_API_ENTER; + sis = scan_info_build(ctx, expr, &n, GRN_OP_OR, 0); + if (sis) { + int i; + grn_inspect_scan_info_list(ctx, buffer, sis, n); + for (i = 0; i < n; i++) { + SI_FREE(sis[i]); + } + GRN_FREE(sis); + } else { + GRN_TEXT_PUTS(ctx, buffer, "sequential search\n"); + } + GRN_API_RETURN(GRN_SUCCESS); +} + +static unsigned int +grn_expr_estimate_size_raw(grn_ctx *ctx, grn_obj *expr, grn_obj *table) +{ + return grn_table_size(ctx, table); +} + +unsigned int +grn_expr_estimate_size(grn_ctx *ctx, grn_obj *expr) +{ + grn_obj *table; + grn_obj *variable; + unsigned int size; + + variable = grn_expr_get_var_by_offset(ctx, expr, 0); + if (!variable) { + ERR(GRN_INVALID_ARGUMENT, "at least one variable must be defined"); + return 0; + } + + table = grn_ctx_at(ctx, variable->header.domain); + if (!table) { + ERR(GRN_INVALID_ARGUMENT, + "variable refers unknown domain: <%u>", variable->header.domain); + return 0; + } + + GRN_API_ENTER; +#ifdef GRN_WITH_MRUBY + if (ctx->impl->mrb.state) { + size = grn_mrb_expr_estimate_size(ctx, expr, table); + } else { + size = grn_expr_estimate_size_raw(ctx, expr, table); + } +#else + size = grn_expr_estimate_size_raw(ctx, expr, table); +#endif + GRN_API_RETURN(size); +} |