summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/expr.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/expr.c2132
1 files changed, 940 insertions, 1192 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/expr.c b/storage/mroonga/vendor/groonga/lib/expr.c
index dd70ebecb40..f13502a0f14 100644
--- a/storage/mroonga/vendor/groonga/lib/expr.c
+++ b/storage/mroonga/vendor/groonga/lib/expr.c
@@ -1,6 +1,6 @@
/* -*- c-basic-offset: 2 -*- */
/*
- Copyright(C) 2010-2014 Brazil
+ Copyright(C) 2010-2015 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -15,33 +15,17 @@
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "groonga_in.h"
-#include "db.h"
-#include "ctx_impl.h"
+#include "grn.h"
+#include "grn_db.h"
+#include "grn_ctx_impl.h"
#include <string.h>
-#include <float.h>
-#include "ii.h"
-#include "geo.h"
-#include "expr.h"
-#include "util.h"
-#include "normalizer_in.h"
-#include "mrb.h"
+#include "grn_ii.h"
+#include "grn_geo.h"
+#include "grn_expr.h"
+#include "grn_util.h"
+#include "grn_mrb.h"
#include "mrb/mrb_expr.h"
-static inline int
-function_proc_p(grn_obj *obj)
-{
- return (obj &&
- obj->header.type == GRN_PROC &&
- ((grn_proc *)obj)->type == GRN_PROC_FUNCTION);
-}
-
-static inline int
-selector_proc_p(grn_obj *obj)
-{
- return (function_proc_p(obj) && ((grn_proc *)obj)->selector);
-}
-
grn_obj *
grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags)
{
@@ -60,7 +44,7 @@ grn_expr_alloc(grn_ctx *ctx, grn_obj *expr, grn_id domain, grn_obj_flags flags)
return res;
}
-static grn_hash *
+grn_hash *
grn_expr_get_vars(grn_ctx *ctx, grn_obj *expr, unsigned int *nvars)
{
grn_hash *vars = NULL;
@@ -131,6 +115,18 @@ grn_proc_get_info(grn_ctx *ctx, grn_user_data *user_data,
}
grn_obj *
+grn_proc_get_vars(grn_ctx *ctx, grn_user_data *user_data)
+{
+ uint32_t n;
+ grn_proc_ctx *pctx = (grn_proc_ctx *)user_data;
+ if (pctx->proc) {
+ return (grn_obj *)grn_expr_get_vars(ctx, (grn_obj *)pctx->proc, &n);
+ } else {
+ return NULL;
+ }
+}
+
+grn_obj *
grn_proc_get_var(grn_ctx *ctx, grn_user_data *user_data, const char *name, unsigned int name_size)
{
grn_proc_ctx *pctx = (grn_proc_ctx *)user_data;
@@ -170,7 +166,7 @@ grn_rc
grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc, grn_selector_func selector)
{
grn_proc *proc_ = (grn_proc *)proc;
- if (!function_proc_p(proc)) {
+ if (!grn_obj_is_function_proc(ctx, proc)) {
return GRN_INVALID_ARGUMENT;
}
proc_->selector = selector;
@@ -179,151 +175,6 @@ grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc, grn_selector_func selector)
/* grn_expr */
-static const char *opstrs[] = {
- "PUSH",
- "POP",
- "NOP",
- "CALL",
- "INTERN",
- "GET_REF",
- "GET_VALUE",
- "AND",
- "AND_NOT",
- "OR",
- "ASSIGN",
- "STAR_ASSIGN",
- "SLASH_ASSIGN",
- "MOD_ASSIGN",
- "PLUS_ASSIGN",
- "MINUS_ASSIGN",
- "SHIFTL_ASSIGN",
- "SHIFTR_ASSIGN",
- "SHIFTRR_ASSIGN",
- "AND_ASSIGN",
- "XOR_ASSIGN",
- "OR_ASSIGN",
- "JUMP",
- "CJUMP",
- "COMMA",
- "BITWISE_OR",
- "BITWISE_XOR",
- "BITWISE_AND",
- "BITWISE_NOT",
- "EQUAL",
- "NOT_EQUAL",
- "LESS",
- "GREATER",
- "LESS_EQUAL",
- "GREATER_EQUAL",
- "IN",
- "MATCH",
- "NEAR",
- "NEAR2",
- "SIMILAR",
- "TERM_EXTRACT",
- "SHIFTL",
- "SHIFTR",
- "SHIFTRR",
- "PLUS",
- "MINUS",
- "STAR",
- "SLASH",
- "MOD",
- "DELETE",
- "INCR",
- "DECR",
- "INCR_POST",
- "DECR_POST",
- "NOT",
- "ADJUST",
- "EXACT",
- "LCP",
- "PARTIAL",
- "UNSPLIT",
- "PREFIX",
- "SUFFIX",
- "GEO_DISTANCE1",
- "GEO_DISTANCE2",
- "GEO_DISTANCE3",
- "GEO_DISTANCE4",
- "GEO_WITHINP5",
- "GEO_WITHINP6",
- "GEO_WITHINP8",
- "OBJ_SEARCH",
- "EXPR_GET_VAR",
- "TABLE_CREATE",
- "TABLE_SELECT",
- "TABLE_SORT",
- "TABLE_GROUP",
- "JSON_PUT"
-};
-
-static void
-put_value(grn_ctx *ctx, grn_obj *buf, grn_obj *obj)
-{
- int len;
- char namebuf[GRN_TABLE_MAX_KEY_SIZE];
- if ((len = grn_column_name(ctx, obj, namebuf, GRN_TABLE_MAX_KEY_SIZE))) {
- GRN_TEXT_PUT(ctx, buf, namebuf, len);
- } else {
- grn_text_otoj(ctx, buf, obj, NULL);
- }
-}
-
-grn_rc
-grn_expr_inspect(grn_ctx *ctx, grn_obj *buf, grn_obj *expr)
-{
- uint32_t i, j;
- grn_expr_var *var;
- grn_expr_code *code;
- grn_expr *e = (grn_expr *)expr;
- grn_hash *vars = grn_expr_get_vars(ctx, expr, &i);
- GRN_TEXT_PUTS(ctx, buf, "noname");
- GRN_TEXT_PUTC(ctx, buf, '(');
- {
- int i = 0;
- grn_obj *value;
- const char *name;
- uint32_t name_len;
- GRN_HASH_EACH(ctx, vars, id, &name, &name_len, &value, {
- if (i++) { GRN_TEXT_PUTC(ctx, buf, ','); }
- GRN_TEXT_PUT(ctx, buf, name, name_len);
- GRN_TEXT_PUTC(ctx, buf, ':');
- put_value(ctx, buf, value);
- });
- }
- GRN_TEXT_PUTC(ctx, buf, ')');
- GRN_TEXT_PUTC(ctx, buf, '{');
- for (j = 0, code = e->codes; j < e->codes_curr; j++, code++) {
- if (j) { GRN_TEXT_PUTC(ctx, buf, ','); }
- grn_text_itoa(ctx, buf, code->modify);
- if (code->op == GRN_OP_PUSH) {
- for (i = 0, var = e->vars; i < e->nvars; i++, var++) {
- if (&var->value == code->value) {
- GRN_TEXT_PUTC(ctx, buf, '?');
- if (var->name_size) {
- GRN_TEXT_PUT(ctx, buf, var->name, var->name_size);
- } else {
- grn_text_itoa(ctx, buf, (int)i);
- }
- break;
- }
- }
- if (i == e->nvars) {
- put_value(ctx, buf, code->value);
- }
- } else {
- if (code->value) {
- put_value(ctx, buf, code->value);
- GRN_TEXT_PUTC(ctx, buf, ' ');
- }
- GRN_TEXT_PUTS(ctx, buf, opstrs[code->op]);
- }
- }
- GRN_TEXT_PUTC(ctx, buf, '}');
- return GRN_SUCCESS;
-}
-
grn_obj *
grn_ctx_pop(grn_ctx *ctx)
{
@@ -504,7 +355,7 @@ grn_expr_open(grn_ctx *ctx, grn_obj_spec *spec, const uint8_t *p, const uint8_t
{
grn_expr *expr = NULL;
if ((expr = GRN_MALLOCN(grn_expr, 1))) {
- int size = 256;
+ int size = GRN_STACK_SIZE;
expr->consts = NULL;
expr->nconsts = 0;
GRN_TEXT_INIT(&expr->name_buf, 0);
@@ -869,6 +720,21 @@ grn_expr_get_var_by_offset(grn_ctx *ctx, grn_obj *expr, unsigned int offset)
DFI_PUT(e, type, domain, code); \
} while (0)
+static void
+grn_expr_append_obj_resolve_const(grn_ctx *ctx,
+ grn_obj *obj,
+ grn_id to_domain)
+{
+ grn_obj dest;
+
+ GRN_OBJ_INIT(&dest, GRN_BULK, 0, to_domain);
+ if (!grn_obj_cast(ctx, obj, &dest, GRN_FALSE)) {
+ grn_obj_reinit(ctx, obj, to_domain, 0);
+ grn_bulk_write(ctx, obj, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
+ }
+ GRN_OBJ_FIN(ctx, &dest);
+}
+
grn_obj *
grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, int nargs)
{
@@ -929,7 +795,8 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op,
ERR(GRN_INVALID_ARGUMENT, "invalid function call expression");
goto exit;
}
- if (!function_proc_p(proc)) {
+ if (!(grn_obj_is_function_proc(ctx, proc) ||
+ grn_obj_is_scorer_proc(ctx, proc))) {
grn_obj buffer;
GRN_TEXT_INIT(&buffer, 0);
@@ -1000,26 +867,14 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op,
if (CONSTP(y)) {
/* todo */
} else {
- grn_obj dest;
if (xd != yd) {
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, yd);
- if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) {
- grn_obj_reinit(ctx, x, yd, 0);
- grn_bulk_write(ctx, x, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
- }
- GRN_OBJ_FIN(ctx, &dest);
+ grn_expr_append_obj_resolve_const(ctx, x, yd);
}
}
} else {
if (CONSTP(y)) {
- grn_obj dest;
if (xd != yd) {
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, xd);
- if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) {
- grn_obj_reinit(ctx, y, xd, 0);
- grn_bulk_write(ctx, y, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
- }
- GRN_OBJ_FIN(ctx, &dest);
+ grn_expr_append_obj_resolve_const(ctx, y, xd);
}
}
}
@@ -1054,6 +909,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op,
case GRN_OP_GET_REF :
case GRN_OP_ADJUST :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
PUSH_CODE(e, op, obj, nargs, code);
if (nargs) {
int i = nargs - 1;
@@ -1367,425 +1223,6 @@ grn_expr_compile(grn_ctx *ctx, grn_obj *expr)
s1 = sp[-2];\
} while (0)
-#define DO_COMPARE_SUB_NUMERIC(y,op) do {\
- switch ((y)->header.domain) {\
- case GRN_DB_INT8 :\
- r = (x_ op GRN_INT8_VALUE(y));\
- break;\
- case GRN_DB_UINT8 :\
- r = (x_ op GRN_UINT8_VALUE(y));\
- break;\
- case GRN_DB_INT16 :\
- r = (x_ op GRN_INT16_VALUE(y));\
- break;\
- case GRN_DB_UINT16 :\
- r = (x_ op GRN_UINT16_VALUE(y));\
- break;\
- case GRN_DB_INT32 :\
- r = (x_ op GRN_INT32_VALUE(y));\
- break;\
- case GRN_DB_UINT32 :\
- r = (x_ op GRN_UINT32_VALUE(y));\
- break;\
- case GRN_DB_INT64 :\
- r = (x_ op GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_TIME :\
- r = (GRN_TIME_PACK(x_,0) op GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_UINT64 :\
- r = (x_ op GRN_UINT64_VALUE(y));\
- break;\
- case GRN_DB_FLOAT :\
- r = (x_ op GRN_FLOAT_VALUE(y));\
- break;\
- default :\
- r = 0;\
- break;\
- }\
-} while (0)
-
-#define DO_COMPARE_SUB(op) do {\
- switch (y->header.domain) {\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- {\
- grn_obj y_;\
- GRN_OBJ_INIT(&y_, GRN_BULK, 0, x->header.domain);\
- if (grn_obj_cast(ctx, y, &y_, GRN_FALSE)) {\
- r = 0;\
- } else {\
- DO_COMPARE_SUB_NUMERIC(&y_, op);\
- }\
- GRN_OBJ_FIN(ctx, &y_);\
- }\
- break;\
- default :\
- DO_COMPARE_SUB_NUMERIC(y,op);\
- break;\
- }\
-} while (0)
-
-#define DO_COMPARE_BUILTIN(x,y,r,op) do {\
- switch (x->header.domain) {\
- case GRN_DB_INT8 :\
- {\
- int8_t x_ = GRN_INT8_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_UINT8 :\
- {\
- uint8_t x_ = GRN_UINT8_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_INT16 :\
- {\
- int16_t x_ = GRN_INT16_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_UINT16 :\
- {\
- uint16_t x_ = GRN_UINT16_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_INT32 :\
- {\
- int32_t x_ = GRN_INT32_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_UINT32 :\
- {\
- uint32_t x_ = GRN_UINT32_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_TIME :\
- {\
- int64_t x_ = GRN_INT64_VALUE(x);\
- switch (y->header.domain) {\
- case GRN_DB_INT32 :\
- r = (x_ op GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\
- break;\
- case GRN_DB_UINT32 :\
- r = (x_ op GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\
- break;\
- case GRN_DB_INT64 :\
- case GRN_DB_TIME :\
- r = (x_ op GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_UINT64 :\
- r = (x_ op GRN_UINT64_VALUE(y));\
- break;\
- case GRN_DB_FLOAT :\
- r = (x_ op GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- {\
- const char *p_ = GRN_TEXT_VALUE(y);\
- int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
- r = (x_ op GRN_TIME_PACK(i_, 0));\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
- }\
- break;\
- case GRN_DB_INT64 :\
- {\
- int64_t x_ = GRN_INT64_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_UINT64 :\
- {\
- uint64_t x_ = GRN_UINT64_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_FLOAT :\
- {\
- double x_ = GRN_FLOAT_VALUE(x);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\
- int r_;\
- uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\
- if (la > lb) {\
- if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb))) {\
- r_ = 1;\
- }\
- } else {\
- if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), la))) {\
- r_ = la == lb ? 0 : -1;\
- }\
- }\
- r = (r_ op 0);\
- } else {\
- const char *q_ = GRN_TEXT_VALUE(x);\
- int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\
- DO_COMPARE_SUB(op);\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
-} while (0)
-
-#define DO_COMPARE(x, y, r, op) do {\
- if (x->header.domain >= GRN_N_RESERVED_TYPES) {\
- grn_obj *table;\
- table = grn_ctx_at(ctx, x->header.domain);\
- switch (table->header.type) {\
- case GRN_TABLE_HASH_KEY :\
- case GRN_TABLE_PAT_KEY :\
- {\
- grn_obj key;\
- int length;\
- GRN_OBJ_INIT(&key, GRN_BULK, 0, table->header.domain);\
- length = grn_table_get_key2(ctx, table, GRN_RECORD_VALUE(x), &key);\
- if (length > 0) {\
- grn_obj *x_original = x;\
- x = &key;\
- DO_COMPARE_BUILTIN((&key), y, r, op);\
- x = x_original;\
- } else {\
- r = 0;\
- }\
- GRN_OBJ_FIN(ctx, &key);\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
- grn_obj_unlink(ctx, table);\
- } else {\
- DO_COMPARE_BUILTIN(x, y, r, op);\
- }\
-} while (0)
-
-#define DO_EQ_SUB do {\
- switch (y->header.domain) {\
- case GRN_DB_INT8 :\
- r = (x_ == GRN_INT8_VALUE(y));\
- break;\
- case GRN_DB_UINT8 :\
- r = (x_ == GRN_UINT8_VALUE(y));\
- break;\
- case GRN_DB_INT16 :\
- r = (x_ == GRN_INT16_VALUE(y));\
- break;\
- case GRN_DB_UINT16 :\
- r = (x_ == GRN_UINT16_VALUE(y));\
- break;\
- case GRN_DB_INT32 :\
- r = (x_ == GRN_INT32_VALUE(y));\
- break;\
- case GRN_DB_UINT32 :\
- r = (x_ == GRN_UINT32_VALUE(y));\
- break;\
- case GRN_DB_INT64 :\
- r = (x_ == GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_TIME :\
- r = (GRN_TIME_PACK(x_,0) == GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_UINT64 :\
- r = (x_ == GRN_UINT64_VALUE(y));\
- break;\
- case GRN_DB_FLOAT :\
- r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- {\
- const char *p_ = GRN_TEXT_VALUE(y);\
- int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
- r = (x_ == i_);\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
-} while (0)
-
-#define DO_EQ(x,y,r) do {\
- switch (x->header.domain) {\
- case GRN_DB_VOID :\
- r = 0;\
- break;\
- case GRN_DB_INT8 :\
- {\
- int8_t x_ = GRN_INT8_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_UINT8 :\
- {\
- uint8_t x_ = GRN_UINT8_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_INT16 :\
- {\
- int16_t x_ = GRN_INT16_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_UINT16 :\
- {\
- uint16_t x_ = GRN_UINT16_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_INT32 :\
- {\
- int32_t x_ = GRN_INT32_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_UINT32 :\
- {\
- uint32_t x_ = GRN_UINT32_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_INT64 :\
- {\
- int64_t x_ = GRN_INT64_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_TIME :\
- {\
- int64_t x_ = GRN_INT64_VALUE(x);\
- switch (y->header.domain) {\
- case GRN_DB_INT32 :\
- r = (x_ == GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\
- break;\
- case GRN_DB_UINT32 :\
- r = (x_ == GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\
- break;\
- case GRN_DB_INT64 :\
- case GRN_DB_TIME :\
- r = (x_ == GRN_INT64_VALUE(y));\
- break;\
- case GRN_DB_UINT64 :\
- r = (x_ == GRN_UINT64_VALUE(y));\
- break;\
- case GRN_DB_FLOAT :\
- r = (x_ == GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- {\
- const char *p_ = GRN_TEXT_VALUE(y);\
- int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
- r = (x_ == GRN_TIME_PACK(i_, 0));\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
- }\
- break;\
- case GRN_DB_UINT64 :\
- {\
- uint64_t x_ = GRN_UINT64_VALUE(x);\
- DO_EQ_SUB;\
- }\
- break;\
- case GRN_DB_FLOAT :\
- {\
- double x_ = GRN_FLOAT_VALUE(x);\
- switch (y->header.domain) {\
- case GRN_DB_INT32 :\
- r = ((x_ <= GRN_INT32_VALUE(y)) && (x_ >= GRN_INT32_VALUE(y)));\
- break;\
- case GRN_DB_UINT32 :\
- r = ((x_ <= GRN_UINT32_VALUE(y)) && (x_ >= GRN_UINT32_VALUE(y)));\
- break;\
- case GRN_DB_INT64 :\
- case GRN_DB_TIME :\
- r = ((x_ <= GRN_INT64_VALUE(y)) && (x_ >= GRN_INT64_VALUE(y)));\
- break;\
- case GRN_DB_UINT64 :\
- r = ((x_ <= GRN_UINT64_VALUE(y)) && (x_ >= GRN_UINT64_VALUE(y)));\
- break;\
- case GRN_DB_FLOAT :\
- r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- {\
- const char *p_ = GRN_TEXT_VALUE(y);\
- int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
- r = (x_ <= i_ && x_ >= i_);\
- }\
- break;\
- default :\
- r = 0;\
- break;\
- }\
- }\
- break;\
- case GRN_DB_SHORT_TEXT :\
- case GRN_DB_TEXT :\
- case GRN_DB_LONG_TEXT :\
- if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\
- uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\
- r = (la == lb && !memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb));\
- } else {\
- const char *q_ = GRN_TEXT_VALUE(x);\
- int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\
- DO_EQ_SUB;\
- }\
- break;\
- default :\
- if ((x->header.domain == y->header.domain)) {\
- r = (GRN_BULK_VSIZE(x) == GRN_BULK_VSIZE(y) &&\
- !(memcmp(GRN_BULK_HEAD(x), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(x))));\
- } else {\
- grn_obj dest;\
- if (x->header.domain < y->header.domain) {\
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, y->header.domain);\
- if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) {\
- r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(y) &&\
- !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(y))); \
- }\
- } else {\
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, x->header.domain);\
- if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) {\
- r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(x) &&\
- !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(x), GRN_BULK_VSIZE(x))); \
- }\
- }\
- GRN_OBJ_FIN(ctx, &dest);\
- }\
- break;\
- }\
-} while (0)
-
#define GEO_RESOLUTION 3600000
#define GEO_RADIOUS 6357303
#define GEO_BES_C1 6334834
@@ -2230,7 +1667,8 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
code++; \
} while (0)
-#define ARITHMETIC_BINARY_OPERATION_DISPATCH(integer8_operation, \
+#define ARITHMETIC_BINARY_OPERATION_DISPATCH(operator, \
+ integer8_operation, \
integer16_operation, \
integer32_operation, \
integer64_operation, \
@@ -2242,6 +1680,23 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
grn_obj *x, *y; \
\
POP2ALLOC1(x, y, res); \
+ if (x->header.type == GRN_VECTOR || y->header.type == GRN_VECTOR) { \
+ grn_obj inspected_x; \
+ grn_obj inspected_y; \
+ GRN_TEXT_INIT(&inspected_x, 0); \
+ GRN_TEXT_INIT(&inspected_y, 0); \
+ grn_inspect(ctx, &inspected_x, x); \
+ grn_inspect(ctx, &inspected_y, y); \
+ ERR(GRN_INVALID_ARGUMENT, \
+ "<%s> doesn't support vector: <%.*s> %s <%.*s>", \
+ operator, \
+ (int)GRN_TEXT_LEN(&inspected_x), GRN_TEXT_VALUE(&inspected_x), \
+ operator, \
+ (int)GRN_TEXT_LEN(&inspected_y), GRN_TEXT_VALUE(&inspected_y)); \
+ GRN_OBJ_FIN(ctx, &inspected_x); \
+ GRN_OBJ_FIN(ctx, &inspected_y); \
+ goto exit; \
+ } \
if (y != res) { \
res->header.domain = x->header.domain; \
} \
@@ -2667,7 +2122,9 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
} \
if (GRN_BULK_VSIZE(var) != (sizeof(grn_obj *) + sizeof(grn_id))) { \
ERR(GRN_INVALID_ARGUMENT, \
- "invalid variable size: expected: %zu, actual: %zu", \
+ "invalid variable size: " \
+ "expected: %" GRN_FMT_SIZE \
+ "actual: %" GRN_FMT_SIZE, \
(sizeof(grn_obj *) + sizeof(grn_id)), GRN_BULK_VSIZE(var)); \
goto exit; \
} \
@@ -2768,52 +2225,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
} \
} while (0)
-static grn_bool
-pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
- grn_obj *normalizer;
- grn_obj *a = NULL, *b = NULL;
- grn_bool matched = GRN_FALSE;
-
- normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
- switch (x->header.domain) {
- case GRN_DB_SHORT_TEXT:
- case GRN_DB_TEXT:
- case GRN_DB_LONG_TEXT:
- a = grn_string_open(ctx, GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x),
- normalizer, 0);
- break;
- default:
- break;
- }
-
- switch (y->header.domain) {
- case GRN_DB_SHORT_TEXT:
- case GRN_DB_TEXT:
- case GRN_DB_LONG_TEXT:
- b = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
- normalizer, 0);
- break;
- default:
- break;
- }
-
- /* normalized str doesn't contain '\0'. */
- if (a && b) {
- const char *a_norm, *b_norm;
- grn_string_get_normalized(ctx, a, &a_norm, NULL, NULL);
- grn_string_get_normalized(ctx, b, &b_norm, NULL, NULL);
- matched = (strstr(a_norm, b_norm) != NULL);
- }
-
- if (a) { grn_obj_close(ctx, a); }
- if (b) { grn_obj_close(ctx, b); }
-
- if (normalizer) { grn_obj_unlink(ctx, normalizer); }
-
- return matched;
-}
-
inline static void
grn_expr_exec_get_member(grn_ctx *ctx,
grn_obj *expr,
@@ -2831,10 +2242,10 @@ grn_expr_exec_get_member(grn_ctx *ctx,
GRN_TEXT_INIT(&values, 0);
grn_obj_get_value(ctx, column, record_id, &values);
- grn_obj_reinit(ctx, result, DB_OBJ(column)->range, 0);
i = GRN_UINT32_VALUE(index);
if (values.header.type == GRN_UVECTOR) {
int n_elements;
+ grn_obj_reinit(ctx, result, DB_OBJ(column)->range, 0);
n_elements = GRN_BULK_VSIZE(&values) / sizeof(grn_id);
if (n_elements > i) {
grn_id value;
@@ -2843,25 +2254,70 @@ grn_expr_exec_get_member(grn_ctx *ctx,
}
} else {
if (values.u.v.n_sections > i) {
- grn_section *section = &(values.u.v.sections[i]);
- grn_obj *body = values.u.v.body;
- const char *value;
- value = GRN_BULK_HEAD(body) + section->offset;
- grn_bulk_write(ctx, result, value, section->length);
+ const char *content;
+ unsigned int content_length;
+ grn_id domain;
+
+ content_length = grn_vector_get_element(ctx, &values, i,
+ &content, NULL, &domain);
+ grn_obj_reinit(ctx, result, domain, 0);
+ grn_bulk_write(ctx, result, content, content_length);
}
}
GRN_OBJ_FIN(ctx, &values);
}
+static inline grn_bool
+grn_expr_exec_is_simple_expr(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_expr *e = (grn_expr *)expr;
+
+ if (expr->header.type != GRN_EXPR) {
+ return GRN_FALSE;
+ }
+
+ if (e->codes_curr != 1) {
+ return GRN_FALSE;
+ }
+
+ switch (e->codes[0].op) {
+ case GRN_OP_PUSH :
+ return GRN_TRUE;
+ default :
+ return GRN_FALSE;
+ }
+}
+
+static inline grn_obj *
+grn_expr_exec_simple(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_expr *e = (grn_expr *)expr;
+
+ return e->codes[0].value;
+}
+
grn_obj *
grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
{
grn_obj *val = NULL;
uint32_t stack_curr = ctx->impl->stack_curr;
GRN_API_ENTER;
+ if (grn_expr_exec_is_simple_expr(ctx, expr)) {
+ val = grn_expr_exec_simple(ctx, expr);
+ GRN_API_RETURN(val);
+ }
if (expr->header.type == GRN_PROC) {
- grn_proc_call(ctx, expr, nargs, expr);
+ grn_proc *proc = (grn_proc *)expr;
+ if (proc->type == GRN_PROC_COMMAND) {
+ grn_command_input *input;
+ input = grn_command_input_open(ctx, expr);
+ grn_command_run(ctx, expr, input);
+ grn_command_input_close(ctx, input);
+ GRN_API_RETURN(NULL);
+ } else {
+ grn_proc_call(ctx, expr, nargs, expr);
+ }
} else {
grn_expr *e = (grn_expr *)expr;
register grn_obj **s_ = ctx->impl->stack, *s0 = NULL, *s1 = NULL, **sp, *vp = e->values;
@@ -3218,12 +2674,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
case GRN_OP_GET_VALUE :
{
grn_obj *col, *rec;
- grn_obj pat_value;
- GRN_TEXT_INIT(&pat_value, 0);
do {
- uint32_t size;
- const char *value;
- grn_bool is_pat_key_accessor = GRN_FALSE;
if (code->nargs == 1) {
rec = v0;
if (code->value) {
@@ -3245,39 +2696,14 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
col = grn_obj_column(ctx, table, GRN_BULK_HEAD(col), GRN_BULK_VSIZE(col));
if (col) { grn_expr_take_obj(ctx, (grn_obj *)expr, col); }
}
- if (col) {
- grn_obj_reinit_for(ctx, res, col);
- if (col->header.type == GRN_ACCESSOR &&
- ((grn_accessor *)col)->action == GRN_ACCESSOR_GET_KEY &&
- ((grn_accessor *)col)->obj->header.type == GRN_TABLE_PAT_KEY) {
- is_pat_key_accessor = GRN_TRUE;
- GRN_BULK_REWIND(&pat_value);
- grn_obj_get_value(ctx, col, GRN_RECORD_VALUE(rec), &pat_value);
- value = GRN_BULK_HEAD(&pat_value);
- size = GRN_BULK_VSIZE(&pat_value);
- } else {
- value = grn_obj_get_value_(ctx, col, GRN_RECORD_VALUE(rec),
- &size);
- }
- } else {
+ if (!col) {
ERR(GRN_INVALID_ARGUMENT, "col resolve failed");
- GRN_OBJ_FIN(ctx, &pat_value);
goto exit;
}
- if (!is_pat_key_accessor && size == GRN_OBJ_GET_VALUE_IMD) {
- GRN_RECORD_SET(ctx, res, (uintptr_t)value);
- } else {
- if (value) {
- if (res->header.type == GRN_VECTOR) {
- grn_vector_decode(ctx, res, value, size);
- } else {
- grn_bulk_write_from(ctx, res, value, 0, size);
- }
- }
- }
+ grn_obj_reinit_for(ctx, res, col);
+ grn_obj_get_value(ctx, col, GRN_RECORD_VALUE(rec), res);
code++;
} while (code < ce && code->op == GRN_OP_GET_VALUE);
- GRN_OBJ_FIN(ctx, &pat_value);
}
break;
case GRN_OP_OBJ_SEARCH :
@@ -3511,7 +2937,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
POP1(y);
POP1(x);
WITH_SPSAVE({
- matched = pseudo_query_scan(ctx, x, y);
+ matched = grn_operator_exec_match(ctx, x, y);
});
ALLOC1(res);
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
@@ -3521,35 +2947,38 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_EQUAL :
{
- int r = GRN_FALSE;
+ grn_bool is_equal;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_EQ(x, y, r);
+ is_equal = grn_operator_exec_equal(ctx, x, y);
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
- GRN_INT32_SET(ctx, res, r);
+ GRN_INT32_SET(ctx, res, is_equal ? 1 : 0);
}
code++;
break;
case GRN_OP_NOT_EQUAL :
{
- int r = GRN_FALSE;
+ grn_bool is_not_equal;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_EQ(x, y, r);
+ is_not_equal = grn_operator_exec_not_equal(ctx, x, y);
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
- GRN_INT32_SET(ctx, res, 1 - r);
+ GRN_INT32_SET(ctx, res, is_not_equal ? 1 : 0);
}
code++;
break;
case GRN_OP_PREFIX :
{
grn_obj *x, *y;
- POP2ALLOC1(x, y, res);
- GRN_INT32_SET(ctx, res,
- (GRN_TEXT_LEN(x) >= GRN_TEXT_LEN(y) &&
- !memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y))));
- res->header.type = GRN_BULK;
- res->header.domain = GRN_DB_INT32;
+ grn_bool matched;
+ POP1(y);
+ POP1(x);
+ WITH_SPSAVE({
+ matched = grn_operator_exec_prefix(ctx, x, y);
+ });
+ ALLOC1(res);
+ grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
+ GRN_INT32_SET(ctx, res, matched ? 1 : 0);
}
code++;
break;
@@ -3568,11 +2997,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_LESS :
{
- int r;
+ grn_bool r;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_COMPARE(x, y, r, <);
- GRN_INT32_SET(ctx, res, r);
+ r = grn_operator_exec_less(ctx, x, y);
+ GRN_INT32_SET(ctx, res, r ? 1 : 0);
res->header.type = GRN_BULK;
res->header.domain = GRN_DB_INT32;
}
@@ -3580,11 +3009,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_GREATER :
{
- int r;
+ grn_bool r;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_COMPARE(x, y, r, >);
- GRN_INT32_SET(ctx, res, r);
+ r = grn_operator_exec_greater(ctx, x, y);
+ GRN_INT32_SET(ctx, res, r ? 1 : 0);
res->header.type = GRN_BULK;
res->header.domain = GRN_DB_INT32;
}
@@ -3592,11 +3021,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_LESS_EQUAL :
{
- int r;
+ grn_bool r;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_COMPARE(x, y, r, <=);
- GRN_INT32_SET(ctx, res, r);
+ r = grn_operator_exec_less_equal(ctx, x, y);
+ GRN_INT32_SET(ctx, res, r ? 1 : 0);
res->header.type = GRN_BULK;
res->header.domain = GRN_DB_INT32;
}
@@ -3604,11 +3033,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_GREATER_EQUAL :
{
- int r;
+ grn_bool r;
grn_obj *x, *y;
POP2ALLOC1(x, y, res);
- DO_COMPARE(x, y, r, >=);
- GRN_INT32_SET(ctx, res, r);
+ r = grn_operator_exec_greater_equal(ctx, x, y);
+ GRN_INT32_SET(ctx, res, r ? 1 : 0);
res->header.type = GRN_BULK;
res->header.domain = GRN_DB_INT32;
}
@@ -3799,6 +3228,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_PLUS :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "+",
INTEGER_ARITHMETIC_OPERATION_PLUS,
INTEGER_ARITHMETIC_OPERATION_PLUS,
INTEGER_ARITHMETIC_OPERATION_PLUS,
@@ -3847,6 +3277,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
,);
} else {
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "-",
INTEGER_ARITHMETIC_OPERATION_MINUS,
INTEGER_ARITHMETIC_OPERATION_MINUS,
INTEGER_ARITHMETIC_OPERATION_MINUS,
@@ -3865,6 +3296,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_STAR :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "*",
INTEGER_ARITHMETIC_OPERATION_STAR,
INTEGER_ARITHMETIC_OPERATION_STAR,
INTEGER_ARITHMETIC_OPERATION_STAR,
@@ -3914,6 +3346,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_BITWISE_OR :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "|",
INTEGER_ARITHMETIC_OPERATION_BITWISE_OR,
INTEGER_ARITHMETIC_OPERATION_BITWISE_OR,
INTEGER_ARITHMETIC_OPERATION_BITWISE_OR,
@@ -3925,6 +3358,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_BITWISE_XOR :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "^",
INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR,
INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR,
INTEGER_ARITHMETIC_OPERATION_BITWISE_XOR,
@@ -3936,6 +3370,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_BITWISE_AND :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "&",
INTEGER_ARITHMETIC_OPERATION_BITWISE_AND,
INTEGER_ARITHMETIC_OPERATION_BITWISE_AND,
INTEGER_ARITHMETIC_OPERATION_BITWISE_AND,
@@ -3947,6 +3382,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_SHIFTL :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ "<<",
INTEGER_ARITHMETIC_OPERATION_SHIFTL,
INTEGER_ARITHMETIC_OPERATION_SHIFTL,
INTEGER_ARITHMETIC_OPERATION_SHIFTL,
@@ -3958,6 +3394,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_SHIFTR :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ ">>",
INTEGER_ARITHMETIC_OPERATION_SHIFTR,
INTEGER_ARITHMETIC_OPERATION_SHIFTR,
INTEGER_ARITHMETIC_OPERATION_SHIFTR,
@@ -3969,6 +3406,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
break;
case GRN_OP_SHIFTRR :
ARITHMETIC_BINARY_OPERATION_DISPATCH(
+ ">>>",
INTEGER8_ARITHMETIC_OPERATION_SHIFTRR,
INTEGER16_ARITHMETIC_OPERATION_SHIFTRR,
INTEGER32_ARITHMETIC_OPERATION_SHIFTRR,
@@ -4010,10 +3448,11 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
case GRN_OP_NOT :
{
grn_obj *value;
+ grn_bool value_boolean;
POP1ALLOC1(value, res);
+ GRN_TRUEP(ctx, value, value_boolean);
grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0);
- grn_obj_cast(ctx, value, res, GRN_FALSE);
- GRN_BOOL_SET(ctx, res, !GRN_BOOL_VALUE(res));
+ GRN_BOOL_SET(ctx, res, !value_boolean);
}
code++;
break;
@@ -4025,6 +3464,21 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
code++;
}
break;
+ case GRN_OP_REGEXP :
+ {
+ grn_obj *target, *pattern;
+ grn_bool matched;
+ POP1(pattern);
+ POP1(target);
+ WITH_SPSAVE({
+ matched = grn_operator_exec_regexp(ctx, target, pattern);
+ });
+ ALLOC1(res);
+ grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0);
+ GRN_BOOL_SET(ctx, res, matched);
+ }
+ code++;
+ break;
default :
ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "not implemented operator assigned");
goto exit;
@@ -4066,6 +3520,8 @@ grn_expr_get_value(grn_ctx *ctx, grn_obj *expr, int offset)
#define DEFAULT_TERM_EXTRACT_POLICY 0
#define DEFAULT_WEIGHT_VECTOR_SIZE 4096
+#define GRN_SCAN_INFO_MAX_N_ARGS 128
+
struct _grn_scan_info {
uint32_t start;
uint32_t end;
@@ -4076,14 +3532,20 @@ struct _grn_scan_info {
grn_obj wv;
grn_obj index;
grn_obj *query;
- grn_obj *args[8];
+ grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS];
int max_interval;
int similarity_threshold;
+ grn_obj scorers;
+ grn_obj scorer_args_exprs;
+ grn_obj scorer_args_expr_offsets;
};
#define SI_FREE(si) do {\
GRN_OBJ_FIN(ctx, &(si)->wv);\
GRN_OBJ_FIN(ctx, &(si)->index);\
+ GRN_OBJ_FIN(ctx, &(si)->scorers);\
+ GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\
+ GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\
GRN_FREE(si);\
} while (0)
@@ -4102,6 +3564,9 @@ struct _grn_scan_info {
(si)->max_interval = DEFAULT_MAX_INTERVAL;\
(si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\
(si)->start = (st);\
+ GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+ GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+ GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\
} while (0)
static scan_info **
@@ -4145,9 +3610,9 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star
} else {
s_->flags &= ~SCAN_PUSH;
s_->logical_op = op;
- memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j));
- memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r));
- memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j));
+ grn_memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j));
+ grn_memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r));
+ grn_memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j));
}
break;
}
@@ -4168,22 +3633,177 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star
return sis;
}
+/* TODO: Remove me if nobody doesn't want to reuse the implementation again. */
+#if 0
+static const char *opstrs[] = {
+ "PUSH",
+ "POP",
+ "NOP",
+ "CALL",
+ "INTERN",
+ "GET_REF",
+ "GET_VALUE",
+ "AND",
+ "AND_NOT",
+ "OR",
+ "ASSIGN",
+ "STAR_ASSIGN",
+ "SLASH_ASSIGN",
+ "MOD_ASSIGN",
+ "PLUS_ASSIGN",
+ "MINUS_ASSIGN",
+ "SHIFTL_ASSIGN",
+ "SHIFTR_ASSIGN",
+ "SHIFTRR_ASSIGN",
+ "AND_ASSIGN",
+ "XOR_ASSIGN",
+ "OR_ASSIGN",
+ "JUMP",
+ "CJUMP",
+ "COMMA",
+ "BITWISE_OR",
+ "BITWISE_XOR",
+ "BITWISE_AND",
+ "BITWISE_NOT",
+ "EQUAL",
+ "NOT_EQUAL",
+ "LESS",
+ "GREATER",
+ "LESS_EQUAL",
+ "GREATER_EQUAL",
+ "IN",
+ "MATCH",
+ "NEAR",
+ "NEAR2",
+ "SIMILAR",
+ "TERM_EXTRACT",
+ "SHIFTL",
+ "SHIFTR",
+ "SHIFTRR",
+ "PLUS",
+ "MINUS",
+ "STAR",
+ "SLASH",
+ "MOD",
+ "DELETE",
+ "INCR",
+ "DECR",
+ "INCR_POST",
+ "DECR_POST",
+ "NOT",
+ "ADJUST",
+ "EXACT",
+ "LCP",
+ "PARTIAL",
+ "UNSPLIT",
+ "PREFIX",
+ "SUFFIX",
+ "GEO_DISTANCE1",
+ "GEO_DISTANCE2",
+ "GEO_DISTANCE3",
+ "GEO_DISTANCE4",
+ "GEO_WITHINP5",
+ "GEO_WITHINP6",
+ "GEO_WITHINP8",
+ "OBJ_SEARCH",
+ "EXPR_GET_VAR",
+ "TABLE_CREATE",
+ "TABLE_SELECT",
+ "TABLE_SORT",
+ "TABLE_GROUP",
+ "JSON_PUT"
+};
+
+static void
+put_value(grn_ctx *ctx, grn_obj *buf, grn_obj *obj)
+{
+ int len;
+ char namebuf[GRN_TABLE_MAX_KEY_SIZE];
+ if ((len = grn_column_name(ctx, obj, namebuf, GRN_TABLE_MAX_KEY_SIZE))) {
+ GRN_TEXT_PUT(ctx, buf, namebuf, len);
+ } else {
+ grn_text_otoj(ctx, buf, obj, NULL);
+ }
+}
+
+static grn_rc
+grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr)
+{
+ uint32_t i, j;
+ grn_expr_var *var;
+ grn_expr_code *code;
+ grn_expr *e = (grn_expr *)expr;
+ grn_hash *vars = grn_expr_get_vars(ctx, expr, &i);
+ GRN_TEXT_PUTS(ctx, buf, "noname");
+ GRN_TEXT_PUTC(ctx, buf, '(');
+ {
+ int i = 0;
+ grn_obj *value;
+ const char *name;
+ uint32_t name_len;
+ GRN_HASH_EACH(ctx, vars, id, &name, &name_len, &value, {
+ if (i++) { GRN_TEXT_PUTC(ctx, buf, ','); }
+ GRN_TEXT_PUT(ctx, buf, name, name_len);
+ GRN_TEXT_PUTC(ctx, buf, ':');
+ put_value(ctx, buf, value);
+ });
+ }
+ GRN_TEXT_PUTC(ctx, buf, ')');
+ GRN_TEXT_PUTC(ctx, buf, '{');
+ for (j = 0, code = e->codes; j < e->codes_curr; j++, code++) {
+ if (j) { GRN_TEXT_PUTC(ctx, buf, ','); }
+ grn_text_itoa(ctx, buf, code->modify);
+ if (code->op == GRN_OP_PUSH) {
+ for (i = 0, var = e->vars; i < e->nvars; i++, var++) {
+ if (&var->value == code->value) {
+ GRN_TEXT_PUTC(ctx, buf, '?');
+ if (var->name_size) {
+ GRN_TEXT_PUT(ctx, buf, var->name, var->name_size);
+ } else {
+ grn_text_itoa(ctx, buf, (int)i);
+ }
+ break;
+ }
+ }
+ if (i == e->nvars) {
+ put_value(ctx, buf, code->value);
+ }
+ } else {
+ if (code->value) {
+ put_value(ctx, buf, code->value);
+ GRN_TEXT_PUTC(ctx, buf, ' ');
+ }
+ GRN_TEXT_PUTS(ctx, buf, opstrs[code->op]);
+ }
+ }
+ GRN_TEXT_PUTC(ctx, buf, '}');
+ return GRN_SUCCESS;
+}
#define EXPRLOG(name,expr) do {\
grn_obj strbuf;\
GRN_TEXT_INIT(&strbuf, 0);\
- grn_expr_inspect(ctx, &strbuf, (expr));\
+ grn_expr_inspect_internal(ctx, &strbuf, (expr));\
GRN_TEXT_PUTC(ctx, &strbuf, '\0');\
GRN_LOG(ctx, GRN_LOG_NOTICE, "%s=(%s)", (name), GRN_TEXT_VALUE(&strbuf));\
GRN_OBJ_FIN(ctx, &strbuf);\
} while (0)
+#endif
+
static void
-scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+scan_info_put_index(grn_ctx *ctx, scan_info *si,
+ grn_obj *index, uint32_t sid, int32_t weight,
+ grn_obj *scorer,
+ grn_obj *scorer_args_expr,
+ uint32_t scorer_args_expr_offset)
{
GRN_PTR_PUT(ctx, &si->index, index);
GRN_UINT32_PUT(ctx, &si->wv, sid);
GRN_INT32_PUT(ctx, &si->wv, weight);
+ GRN_PTR_PUT(ctx, &si->scorers, scorer);
+ GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr);
+ GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset);
{
int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1;
grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni);
@@ -4191,10 +3811,10 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i
if (index == pi[-1]) {
if (i) {
int32_t *pw = &GRN_INT32_VALUE_AT(&si->wv, (ni - i) * 2);
- memmove(pw + 2, pw, sizeof(int32_t) * 2 * i);
+ grn_memmove(pw + 2, pw, sizeof(int32_t) * 2 * i);
pw[0] = (int32_t) sid;
pw[1] = weight;
- memmove(pi + 1, pi, sizeof(grn_obj *) * i);
+ grn_memmove(pi + 1, pi, sizeof(grn_obj *) * i);
pi[0] = index;
}
return;
@@ -4204,10 +3824,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i
}
static int32_t
-get_weight(grn_ctx *ctx, grn_expr_code *ec)
+get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
{
if (ec->modify == 2 && ec[2].op == GRN_OP_STAR &&
ec[1].value && ec[1].value->header.type == GRN_BULK) {
+ if (offset) {
+ *offset = 2;
+ }
if (ec[1].value->header.domain == GRN_DB_INT32 ||
ec[1].value->header.domain == GRN_DB_UINT32) {
return GRN_INT32_VALUE(ec[1].value);
@@ -4222,6 +3845,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec)
return weight;
}
} else {
+ if (offset) {
+ *offset = 0;
+ }
return 1;
}
}
@@ -4243,6 +3869,9 @@ grn_scan_info_open(grn_ctx *ctx, int start)
si->max_interval = DEFAULT_MAX_INTERVAL;
si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
si->start = start;
+ GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);
+ GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);
+ GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR);
return si;
}
@@ -4254,9 +3883,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si)
}
void
-grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+grn_scan_info_put_index(grn_ctx *ctx, scan_info *si,
+ grn_obj *index, uint32_t sid, int32_t weight,
+ grn_obj *scorer,
+ grn_obj *scorer_args_expr,
+ uint32_t scorer_args_expr_offset)
{
- scan_info_put_index(ctx, si, index, sid, weight);
+ scan_info_put_index(ctx, si, index, sid, weight,
+ scorer,
+ scorer_args_expr,
+ scorer_args_expr_offset);
}
scan_info **
@@ -4267,9 +3903,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip,
}
int32_t
-grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec)
+grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
{
- return get_weight(ctx, ec);
+ return get_weight(ctx, ec, offset);
}
int
@@ -4347,7 +3983,7 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold)
grn_bool
grn_scan_info_push_arg(scan_info *si, grn_obj *arg)
{
- if (si->nargs >= 8) {
+ if (si->nargs >= GRN_SCAN_INFO_MAX_N_ARGS) {
return GRN_FALSE;
}
@@ -4365,36 +4001,324 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i)
}
static uint32_t
-scan_info_build_find_index_column_index(grn_ctx *ctx,
- scan_info *si,
- grn_expr_code *ec,
- uint32_t n_rest_codes,
- grn_operator op)
+scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si,
+ grn_expr *expr, uint32_t i,
+ grn_obj **index,
+ int *sid)
{
- uint32_t offset = 0;
- grn_obj *index;
+ grn_expr_code *ec;
+ uint32_t offset = 1;
+ grn_index_datum index_datum;
+ unsigned int n_index_data = 0;
+
+ ec = &(expr->codes[i]);
+ switch (ec->value->header.type) {
+ case GRN_ACCESSOR :
+ n_index_data = grn_column_find_index_data(ctx, ec->value, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ grn_accessor *a = (grn_accessor *)(ec->value);
+ *sid = index_datum.section;
+ if (a->next && a->obj != index_datum.index) {
+ *index = ec->value;
+ } else {
+ *index = index_datum.index;
+ }
+ }
+ break;
+ case GRN_COLUMN_FIX_SIZE :
+ case GRN_COLUMN_VAR_SIZE :
+ n_index_data = grn_column_find_index_data(ctx, ec->value, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ *index = index_datum.index;
+ *sid = index_datum.section;
+ }
+ break;
+ case GRN_COLUMN_INDEX :
+ {
+ uint32_t n_rest_codes;
+
+ *index = ec->value;
+
+ n_rest_codes = expr->codes_curr - i;
+ if (n_rest_codes >= 2 &&
+ ec[1].value &&
+ (ec[1].value->header.domain == GRN_DB_INT32 ||
+ ec[1].value->header.domain == GRN_DB_UINT32) &&
+ ec[2].op == GRN_OP_GET_MEMBER) {
+ if (ec[1].value->header.domain == GRN_DB_INT32) {
+ *sid = GRN_INT32_VALUE(ec[1].value) + 1;
+ } else {
+ *sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+ }
+ offset += 2;
+ }
+ }
+ break;
+ default :
+ break;
+ }
+
+ return offset;
+}
+
+static uint32_t
+scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
+ grn_expr *expr, uint32_t i)
+{
+ grn_expr_code *ec;
+ grn_obj *index = NULL;
int sid = 0;
- int32_t weight = 0;
-
- index = ec->value;
- if (n_rest_codes > 2 &&
- ec[1].value &&
- (ec[1].value->header.domain == GRN_DB_INT32 ||
- ec[1].value->header.domain == GRN_DB_UINT32) &&
- ec[2].op == GRN_OP_GET_MEMBER) {
- if (ec[1].value->header.domain == GRN_DB_INT32) {
- sid = GRN_INT32_VALUE(ec[1].value) + 1;
+ uint32_t offset = 0;
+
+ ec = &(expr->codes[i]);
+ if (!ec->value) {
+ return i + 1;
+ }
+
+ switch (ec->value->header.type) {
+ case GRN_ACCESSOR :
+ case GRN_COLUMN_FIX_SIZE :
+ case GRN_COLUMN_VAR_SIZE :
+ case GRN_COLUMN_INDEX :
+ offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+ &index, &sid);
+ i += offset - 1;
+ if (index) {
+ if (ec->value->header.type == GRN_ACCESSOR) {
+ si->flags |= SCAN_ACCESSOR;
+ }
+ scan_info_put_index(ctx, si, index, sid,
+ get_weight(ctx, &(expr->codes[i]), &offset),
+ NULL, NULL, 0);
+ i += offset;
+ }
+ break;
+ case GRN_PROC :
+ if (!grn_obj_is_scorer_proc(ctx, ec->value)) {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, ec->value);
+ ERR(GRN_INVALID_ARGUMENT,
+ "procedure must be scorer: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return expr->codes_curr;
+ }
+ i++;
+ offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+ &index, &sid);
+ i += offset;
+ if (index) {
+ uint32_t scorer_args_expr_offset = 0;
+ if (expr->codes[i].op != GRN_OP_CALL) {
+ scorer_args_expr_offset = i;
+ }
+ while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) {
+ i++;
+ }
+ scan_info_put_index(ctx, si, index, sid,
+ get_weight(ctx, &(expr->codes[i]), &offset),
+ ec->value,
+ (grn_obj *)expr,
+ scorer_args_expr_offset);
+ i += offset;
+ }
+ break;
+ default :
+ {
+ char name[GRN_TABLE_MAX_KEY_SIZE];
+ int name_size;
+ name_size = grn_obj_name(ctx, ec->value, name, GRN_TABLE_MAX_KEY_SIZE);
+ ERR(GRN_INVALID_ARGUMENT,
+ "invalid match target: <%.*s>",
+ name_size, name);
+ return expr->codes_curr;
+ }
+ break;
+ }
+
+ return i + 1;
+}
+
+static void
+scan_info_build_match_expr(grn_ctx *ctx, scan_info *si, grn_expr *expr)
+{
+ uint32_t i;
+ i = 0;
+ while (i < expr->codes_curr) {
+ i = scan_info_build_match_expr_codes(ctx, si, expr, i);
+ }
+}
+
+static grn_bool
+is_index_searchable_regexp(grn_ctx *ctx, grn_obj *regexp)
+{
+ const char *regexp_raw;
+ const char *regexp_raw_end;
+ grn_bool escaping = GRN_FALSE;
+
+ if (!(regexp->header.domain == GRN_DB_SHORT_TEXT ||
+ regexp->header.domain == GRN_DB_TEXT ||
+ regexp->header.domain == GRN_DB_LONG_TEXT)) {
+ return GRN_FALSE;
+ }
+
+ regexp_raw = GRN_TEXT_VALUE(regexp);
+ regexp_raw_end = regexp_raw + GRN_TEXT_LEN(regexp);
+
+ while (regexp_raw < regexp_raw_end) {
+ unsigned int char_len;
+
+ char_len = grn_charlen(ctx, regexp_raw, regexp_raw_end);
+ if (char_len == 0) {
+ return GRN_FALSE;
+ }
+
+ if (char_len == 1) {
+ if (escaping) {
+ escaping = GRN_FALSE;
+ switch (regexp_raw[0]) {
+ case 'Z' :
+ case 'b' :
+ case 'B' :
+ case 'd' :
+ case 'D' :
+ case 'h' :
+ case 'H' :
+ case 'p' :
+ case 's' :
+ case 'S' :
+ case 'w' :
+ case 'W' :
+ case 'X' :
+ case 'k' :
+ case 'g' :
+ case '1' :
+ case '2' :
+ case '3' :
+ case '4' :
+ case '5' :
+ case '6' :
+ case '7' :
+ case '8' :
+ case '9' :
+ return GRN_FALSE;
+ default :
+ break;
+ }
+ } else {
+ switch (regexp_raw[0]) {
+ case '.' :
+ case '[' :
+ case ']' :
+ case '|' :
+ case '?' :
+ case '+' :
+ case '*' :
+ case '{' :
+ case '}' :
+ case '^' :
+ case '$' :
+ case '(' :
+ case ')' :
+ escaping = GRN_FALSE;
+ return GRN_FALSE;
+ case '\\' :
+ escaping = GRN_TRUE;
+ break;
+ default :
+ escaping = GRN_FALSE;
+ break;
+ }
+ }
} else {
- sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+ escaping = GRN_FALSE;
}
- offset = 2;
- weight = get_weight(ctx, ec + offset);
- } else {
- weight = get_weight(ctx, ec);
+
+ regexp_raw += char_len;
}
- scan_info_put_index(ctx, si, index, sid, weight);
- return offset;
+ return GRN_TRUE;
+}
+
+static void
+scan_info_build_match(grn_ctx *ctx, scan_info *si)
+{
+ grn_obj **p, **pe;
+
+ if (si->op == GRN_OP_REGEXP) {
+ p = si->args;
+ pe = si->args + si->nargs;
+ for (; p < pe; p++) {
+ if ((*p)->header.type == GRN_BULK &&
+ !is_index_searchable_regexp(ctx, *p)) {
+ return;
+ }
+ }
+ }
+
+ p = si->args;
+ pe = si->args + si->nargs;
+ for (; p < pe; p++) {
+ if ((*p)->header.type == GRN_EXPR) {
+ scan_info_build_match_expr(ctx, si, (grn_expr *)(*p));
+ } else if (GRN_DB_OBJP(*p)) {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, *p, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
+ }
+ } else if (GRN_ACCESSORP(*p)) {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ si->flags |= SCAN_ACCESSOR;
+ n_index_data = grn_column_find_index_data(ctx, *p, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ grn_obj *index;
+ if (((grn_accessor *)(*p))->next) {
+ index = *p;
+ } else {
+ index = index_datum.index;
+ }
+ scan_info_put_index(ctx, si,
+ index, index_datum.section, 1,
+ NULL, NULL, 0);
+ }
+ } else {
+ switch (si->op) {
+ case GRN_OP_NEAR :
+ case GRN_OP_NEAR2 :
+ if (si->nargs == 3 &&
+ *p == si->args[2] &&
+ (*p)->header.domain == GRN_DB_INT32) {
+ si->max_interval = GRN_INT32_VALUE(*p);
+ } else {
+ si->query = *p;
+ }
+ break;
+ case GRN_OP_SIMILAR :
+ if (si->nargs == 3 &&
+ *p == si->args[2] &&
+ (*p)->header.domain == GRN_DB_INT32) {
+ si->similarity_threshold = GRN_INT32_VALUE(*p);
+ } else {
+ si->query = *p;
+ }
+ break;
+ default :
+ si->query = *p;
+ break;
+ }
+ }
+ }
}
static scan_info **
@@ -4431,10 +4355,26 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
case GRN_OP_GEO_WITHINP6 :
case GRN_OP_GEO_WITHINP8 :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; }
stat = SCAN_START;
m++;
break;
+ case GRN_OP_BITWISE_OR :
+ case GRN_OP_BITWISE_XOR :
+ case GRN_OP_BITWISE_AND :
+ case GRN_OP_BITWISE_NOT :
+ case GRN_OP_SHIFTL :
+ case GRN_OP_SHIFTR :
+ case GRN_OP_SHIFTRR :
+ case GRN_OP_PLUS :
+ case GRN_OP_MINUS :
+ case GRN_OP_STAR :
+ case GRN_OP_MOD :
+ if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; }
+ stat = SCAN_START;
+ if (m != o + 1) { return NULL; }
+ break;
case GRN_OP_AND :
case GRN_OP_OR :
case GRN_OP_AND_NOT :
@@ -4496,90 +4436,17 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
case GRN_OP_GEO_WITHINP6 :
case GRN_OP_GEO_WITHINP8 :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
stat = SCAN_START;
si->op = c->op;
si->end = c - e->codes;
sis[i++] = si;
- {
- int sid;
- grn_obj *index, **p = si->args, **pe = si->args + si->nargs;
- for (; p < pe; p++) {
- if ((*p)->header.type == GRN_EXPR) {
- uint32_t j;
- grn_expr_code *ec;
- grn_expr *e = (grn_expr *)(*p);
- for (j = e->codes_curr, ec = e->codes; j--; ec++) {
- if (ec->value) {
- switch (ec->value->header.type) {
- case GRN_ACCESSOR :
- if (grn_column_index(ctx, ec->value, c->op, &index, 1, &sid)) {
- int32_t weight = get_weight(ctx, ec);
- si->flags |= SCAN_ACCESSOR;
- if (((grn_accessor *)ec->value)->next) {
- scan_info_put_index(ctx, si, ec->value, sid, weight);
- } else {
- scan_info_put_index(ctx, si, index, sid, weight);
- }
- }
- break;
- case GRN_COLUMN_FIX_SIZE :
- case GRN_COLUMN_VAR_SIZE :
- if (grn_column_index(ctx, ec->value, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec));
- }
- break;
- case GRN_COLUMN_INDEX :
- {
- uint32_t offset;
- offset = scan_info_build_find_index_column_index(ctx, si, ec,
- j, c->op);
- j -= offset;
- ec += offset;
- }
- break;
- }
- }
- }
- } else if (GRN_DB_OBJP(*p)) {
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
- }
- } else if (GRN_ACCESSORP(*p)) {
- si->flags |= SCAN_ACCESSOR;
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- if (((grn_accessor *)(*p))->next) {
- scan_info_put_index(ctx, si, *p, sid, 1);
- } else {
- scan_info_put_index(ctx, si, index, sid, 1);
- }
- }
- } else {
- switch (c->op) {
- case GRN_OP_NEAR :
- case GRN_OP_NEAR2 :
- if (si->nargs == 3 &&
- *p == si->args[2] &&
- (*p)->header.domain == GRN_DB_INT32) {
- si->max_interval = GRN_INT32_VALUE(*p);
- } else {
- si->query = *p;
- }
- break;
- case GRN_OP_SIMILAR :
- if (si->nargs == 3 &&
- *p == si->args[2] &&
- (*p)->header.domain == GRN_DB_INT32) {
- si->similarity_threshold = GRN_INT32_VALUE(*p);
- } else {
- si->query = *p;
- }
- break;
- default :
- si->query = *p;
- break;
- }
- }
- }
+ scan_info_build_match(ctx, si);
+ if (ctx->rc != GRN_SUCCESS) {
+ int j;
+ for (j = 0; j < i; j++) { SI_FREE(sis[j]); }
+ GRN_FREE(sis);
+ return NULL;
}
si = NULL;
break;
@@ -4595,7 +4462,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
if (c->value == var) {
stat = SCAN_VAR;
} else {
- if (si->nargs < 8) {
+ if (si->nargs < GRN_SCAN_INFO_MAX_N_ARGS) {
si->args[si->nargs++] = c->value;
}
if (stat == SCAN_START) { si->flags |= SCAN_PRE_CONST; }
@@ -4610,7 +4477,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
case SCAN_CONST :
case SCAN_VAR :
stat = SCAN_COL1;
- if (si->nargs < 8) {
+ if (si->nargs < GRN_SCAN_INFO_MAX_N_ARGS) {
si->args[si->nargs++] = c->value;
}
break;
@@ -4628,6 +4495,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
"invalid expression: can't use column as a value: %.*s",
(int)GRN_TEXT_LEN(&inspected), GRN_TEXT_VALUE(&inspected));
GRN_OBJ_FIN(ctx, &inspected);
+ SI_FREE(si);
for (j = 0; j < i; j++) { SI_FREE(sis[j]); }
GRN_FREE(sis);
return NULL;
@@ -4649,17 +4517,28 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
sis[i++] = si;
/* better index resolving framework for functions should be implemented */
{
- int sid;
- grn_obj *index, **p = si->args, **pe = si->args + si->nargs;
+ grn_obj **p = si->args, **pe = si->args + si->nargs;
for (; p < pe; p++) {
if (GRN_DB_OBJP(*p)) {
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else if (GRN_ACCESSORP(*p)) {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
si->flags |= SCAN_ACCESSOR;
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
+ n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else {
si->query = *p;
@@ -4694,6 +4573,43 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
return sis;
}
+void
+grn_inspect_scan_info_list(grn_ctx *ctx, grn_obj *buffer, scan_info **sis, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ scan_info *si = sis[i];
+
+ grn_text_printf(ctx, buffer, "[%d]\n", i);
+ grn_text_printf(ctx, buffer,
+ " op: <%s>\n",
+ grn_operator_to_string(si->op));
+ grn_text_printf(ctx, buffer,
+ " logical_op: <%s>\n",
+ grn_operator_to_string(si->logical_op));
+
+ GRN_TEXT_PUTS(ctx, buffer, " query: <");
+ grn_inspect(ctx, buffer, si->query);
+ GRN_TEXT_PUTS(ctx, buffer, ">\n");
+
+ grn_text_printf(ctx, buffer,
+ " expr: <%d..%d>\n", si->start, si->end);
+ }
+}
+
+void
+grn_p_scan_info_list(grn_ctx *ctx, scan_info **sis, int n)
+{
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect_scan_info_list(ctx, &inspected, sis, n);
+ printf("%.*s\n",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+}
+
inline static int32_t
exec_result_to_score(grn_ctx *ctx, grn_obj *result, grn_obj *score_buffer)
{
@@ -4719,8 +4635,8 @@ exec_result_to_score(grn_ctx *ctx, grn_obj *result, grn_obj *score_buffer)
}
static void
-grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v,
- grn_obj *res, grn_operator op)
+grn_table_select_sequential(grn_ctx *ctx, grn_obj *table, grn_obj *expr,
+ grn_obj *v, grn_obj *res, grn_operator op)
{
int32_t score;
grn_id id, *idp;
@@ -4737,6 +4653,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v,
while ((id = grn_table_cursor_next(ctx, tc))) {
GRN_RECORD_SET(ctx, v, id);
r = grn_expr_exec(ctx, expr, 0);
+ if (ctx->rc) {
+ break;
+ }
score = exec_result_to_score(ctx, r, &score_buffer);
if (score > 0) {
grn_rset_recinfo *ri;
@@ -4754,6 +4673,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v,
grn_hash_cursor_get_key(ctx, hc, (void **) &idp);
GRN_RECORD_SET(ctx, v, *idp);
r = grn_expr_exec(ctx, expr, 0);
+ if (ctx->rc) {
+ break;
+ }
score = exec_result_to_score(ctx, r, &score_buffer);
if (score > 0) {
grn_rset_recinfo *ri;
@@ -4772,6 +4694,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v,
grn_hash_cursor_get_key(ctx, hc, (void **) &idp);
GRN_RECORD_SET(ctx, v, *idp);
r = grn_expr_exec(ctx, expr, 0);
+ if (ctx->rc) {
+ break;
+ }
score = exec_result_to_score(ctx, r, &score_buffer);
if (score > 0) {
grn_hash_cursor_delete(ctx, hc, NULL);
@@ -4786,6 +4711,9 @@ grn_table_select_(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_obj *v,
grn_hash_cursor_get_key(ctx, hc, (void **) &idp);
GRN_RECORD_SET(ctx, v, *idp);
r = grn_expr_exec(ctx, expr, 0);
+ if (ctx->rc) {
+ break;
+ }
score = exec_result_to_score(ctx, r, &score_buffer);
if (score > 0) {
grn_rset_recinfo *ri;
@@ -4854,13 +4782,29 @@ grn_table_select_index_range_column(grn_ctx *ctx, grn_obj *table,
min, min_size, max, max_size,
offset, limit, flags);
if (cursor) {
- grn_id index_id;
- while ((index_id = grn_table_cursor_next(ctx, cursor))) {
- grn_ii_at(ctx, (grn_ii *)index, index_id,
- (grn_hash *)res, logical_op);
+ uint32_t sid;
+ int32_t weight;
+ grn_obj *index_cursor;
+
+ sid = GRN_UINT32_VALUE_AT(&(si->wv), 0);
+ weight = GRN_INT32_VALUE_AT(&(si->wv), 1);
+ index_cursor = grn_index_cursor_open(ctx, cursor, index,
+ GRN_ID_NIL, GRN_ID_MAX, 0);
+ if (index_cursor) {
+ grn_posting *posting;
+ while ((posting = grn_index_cursor_next(ctx, index_cursor, NULL))) {
+ if (sid == 0 || posting->sid == sid) {
+ grn_ii_posting ii_posting;
+ ii_posting.rid = posting->rid;
+ ii_posting.sid = posting->sid;
+ ii_posting.weight = posting->weight * weight;
+ grn_ii_posting_add(ctx, &ii_posting, (grn_hash *)res, logical_op);
+ }
+ }
+ processed = GRN_TRUE;
+ grn_obj_unlink(ctx, index_cursor);
}
grn_table_cursor_close(ctx, cursor);
- processed = GRN_TRUE;
}
grn_ii_resolve_sel_and(ctx, (grn_hash *)res, logical_op);
@@ -4913,11 +4857,21 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table,
{
int i;
+ grn_obj weight_vector;
+ grn_search_optarg optarg;
+ GRN_INT32_INIT(&weight_vector, GRN_OBJ_VECTOR);
+ memset(&optarg, 0, sizeof(grn_search_optarg));
+ if (si->op == GRN_OP_MATCH) {
+ optarg.mode = GRN_OP_EXACT;
+ } else {
+ optarg.mode = si->op;
+ }
for (i = n_accessors - 1; i > 0; i--) {
grn_rc rc = GRN_SUCCESS;
grn_accessor *accessor;
grn_obj *index;
+ int section;
grn_obj *domain;
grn_obj *target;
grn_obj *next_res;
@@ -4926,10 +4880,34 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table,
accessor = (grn_accessor *)GRN_PTR_VALUE_AT(accessor_stack, i - 1);
target = accessor->obj;
- if (grn_column_index(ctx, target, GRN_OP_EQUAL, &index, 1, NULL) == 0) {
- grn_obj_unlink(ctx, current_res);
- current_res = NULL;
- break;
+ {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, target, GRN_OP_EQUAL,
+ &index_datum, 1);
+ if (n_index_data == 0) {
+ grn_obj_unlink(ctx, current_res);
+ current_res = NULL;
+ break;
+ }
+ index = index_datum.index;
+ section = index_datum.section;
+ }
+
+ if (section > 0) {
+ int j;
+ int weight_position = section - 1;
+
+ GRN_BULK_REWIND(&weight_vector);
+ GRN_INT32_SET_AT(ctx, &weight_vector, weight_position, 1);
+ optarg.weight_vector = &(GRN_INT32_VALUE(&weight_vector));
+ optarg.vector_size = GRN_BULK_VSIZE(&weight_vector) / sizeof(int32_t);
+ for (j = 0; j < weight_position - 1; j++) {
+ optarg.weight_vector[j] = 0;
+ }
+ } else {
+ optarg.weight_vector = NULL;
+ optarg.vector_size = 1;
}
{
@@ -4954,14 +4932,14 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table,
if (domain->header.type == GRN_TABLE_NO_KEY) {
rc = grn_ii_sel(ctx, (grn_ii *)index,
(const char *)next_record_id, sizeof(grn_id),
- (grn_hash *)next_res, next_op, NULL);
+ (grn_hash *)next_res, next_op, &optarg);
} else {
char key[GRN_TABLE_MAX_KEY_SIZE];
int key_len;
key_len = grn_table_get_key(ctx, domain, *next_record_id,
key, GRN_TABLE_MAX_KEY_SIZE);
rc = grn_ii_sel(ctx, (grn_ii *)index, key, key_len,
- (grn_hash *)next_res, next_op, NULL);
+ (grn_hash *)next_res, next_op, &optarg);
}
if (rc != GRN_SUCCESS) {
break;
@@ -4986,6 +4964,7 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table,
break;
}
}
+ GRN_OBJ_FIN(ctx, &weight_vector);
}
return current_res == res;
@@ -5085,7 +5064,10 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
GRN_BULK_HEAD(si->query),
GRN_BULK_VSIZE(si->query));
}
- grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, si->logical_op);
+ if (tid != GRN_ID_NIL) {
+ grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res,
+ si->logical_op);
+ }
}
grn_ii_resolve_sel_and(ctx, (grn_hash *)res, si->logical_op);
processed = GRN_TRUE;
@@ -5173,9 +5155,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
case GRN_OP_NEAR :
case GRN_OP_NEAR2 :
case GRN_OP_SIMILAR :
+ case GRN_OP_REGEXP :
{
grn_obj wv, **ip = &GRN_PTR_VALUE(&si->index);
- int j = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *);
+ int j;
+ int n_indexes = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *);
int32_t *wp = &GRN_INT32_VALUE(&si->wv);
grn_search_optarg optarg;
GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR);
@@ -5203,19 +5187,30 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
optarg.proc = NULL;
optarg.max_size = 0;
ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND;
- for (; j--; ip++, wp += 2) {
+ for (j = 0; j < n_indexes; j++, ip++, wp += 2) {
uint32_t sid = (uint32_t) wp[0];
int32_t weight = wp[1];
if (sid) {
int weight_index = sid - 1;
- GRN_INT32_SET_AT(ctx, &wv, weight_index, weight);
+ int current_vector_size;
+ current_vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t);
+ if (weight_index < current_vector_size) {
+ ((int *)GRN_BULK_HEAD(&wv))[weight_index] = weight;
+ } else {
+ GRN_INT32_SET_AT(ctx, &wv, weight_index, weight);
+ }
optarg.weight_vector = &GRN_INT32_VALUE(&wv);
optarg.vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t);
} else {
optarg.weight_vector = NULL;
optarg.vector_size = weight;
}
- if (j) {
+ optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j);
+ optarg.scorer_args_expr =
+ GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j);
+ optarg.scorer_args_expr_offset =
+ GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j);
+ if (j < n_indexes - 1) {
if (sid && ip[0] == ip[1]) { continue; }
} else {
ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND;
@@ -5250,7 +5245,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
}
break;
case GRN_OP_CALL :
- if (selector_proc_p(si->args[0])) {
+ if (grn_obj_is_selector_proc(ctx, si->args[0])) {
grn_rc rc;
grn_proc *proc = (grn_proc *)(si->args[0]);
rc = proc->selector(ctx, table, index, si->nargs, si->args,
@@ -5276,7 +5271,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
} else {
switch (si->op) {
case GRN_OP_CALL :
- if (selector_proc_p(si->args[0])) {
+ if (grn_obj_is_selector_proc(ctx, si->args[0])) {
grn_rc rc;
grn_proc *proc = (grn_proc *)(si->args[0]);
rc = proc->selector(ctx, table, NULL, si->nargs, si->args,
@@ -5361,11 +5356,13 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr,
if (ctx->rc) { break; }
e->codes = codes + si->start;
e->codes_curr = si->end - si->start + 1;
- grn_table_select_(ctx, table, expr, v, res, si->logical_op);
+ grn_table_select_sequential(ctx, table, expr, v,
+ res, si->logical_op);
}
}
GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
":", "filter(%d)", grn_table_size(ctx, res));
+ if (ctx->rc) { break; }
}
for (i = 0; i < n; i++) {
scan_info *si = sis[i];
@@ -5377,7 +5374,7 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr,
e->codes_curr = codes_curr;
} else {
if (!ctx->rc) {
- grn_table_select_(ctx, table, expr, v, res, op);
+ grn_table_select_sequential(ctx, table, expr, v, res, op);
}
}
}
@@ -5408,7 +5405,7 @@ grn_int32_value_at(grn_obj *obj, int offset)
/* grn_expr_create_from_str */
-#include "snip.h"
+#include "grn_snip.h"
typedef struct {
grn_ctx *ctx;
@@ -5457,239 +5454,6 @@ skip_space(grn_ctx *ctx, efs_info *q)
}
}
-static grn_rc get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode);
-static grn_rc get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode);
-
-static grn_rc
-get_phrase(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option)
-{
- const char *start, *s;
- start = s = q->cur;
- GRN_BULK_REWIND(&q->buf);
- while (1) {
- unsigned int len;
- if (s >= q->str_end) {
- q->cur = s;
- break;
- }
- len = grn_charlen(ctx, s, q->str_end);
- if (len == 0) {
- /* invalid string containing malformed multibyte char */
- return GRN_END_OF_DATA;
- } else if (len == 1) {
- if (*s == GRN_QUERY_QUOTER) {
- q->cur = s + 1;
- break;
- } else if (*s == GRN_QUERY_ESCAPE && s + 1 < q->str_end) {
- s++;
- len = grn_charlen(ctx, s, q->str_end);
- }
- }
- GRN_TEXT_PUT(ctx, &q->buf, s, len);
- s += len;
- }
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const(ctx, q->e, &q->buf, GRN_OP_PUSH, 1);
- if (mode == GRN_OP_MATCH || mode == GRN_OP_EXACT) {
- grn_expr_append_op(ctx, q->e, mode, 2);
- } else {
- grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, mode, 3);
- }
- return GRN_SUCCESS;
-}
-
-static grn_rc
-get_geocond(grn_ctx *ctx, efs_info *q, grn_obj *longitude, grn_obj *latitude)
-{
- unsigned int len;
- const char *start = q->cur, *end;
- for (end = q->cur;; ) {
- /* null check and length check */
- if (!(len = grn_charlen(ctx, end, q->str_end))) {
- q->cur = q->str_end;
- break;
- }
- if (grn_isspace(end, ctx->encoding) ||
- *end == GRN_QUERY_PARENR) {
- q->cur = end;
- break;
- }
- }
- {
- const char *tokbuf[8];
- int32_t lng0, lat0, lng1, lat1, lng2, lat2, r;
- int32_t n = grn_str_tok((char *)start, end - start, ',', tokbuf, 8, NULL);
- switch (n) {
- case 3 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- r = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, r, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP5, 5);
- break;
- case 4 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP6, 6);
- break;
- case 6 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL);
- lng2 = grn_atoi(tokbuf[3] + 1, tokbuf[4], NULL);
- lat2 = grn_atoi(tokbuf[4] + 1, tokbuf[5], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng2, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat2, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP8, 8);
- break;
- default :
- ERR(GRN_INVALID_ARGUMENT, "invalid geocond");
- break;
- }
- }
- return ctx->rc;
-}
-
-static grn_rc
-get_word(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option)
-{
- const char *start = q->cur, *end;
- unsigned int len;
- for (end = q->cur;; ) {
- /* null check and length check */
- if (!(len = grn_charlen(ctx, end, q->str_end))) {
- q->cur = q->str_end;
- break;
- }
- if (grn_isspace(end, ctx->encoding) ||
- *end == GRN_QUERY_PARENR) {
- q->cur = end;
- break;
- }
- if (*end == GRN_QUERY_COLUMN) {
- grn_obj *c = grn_obj_column(ctx, q->table, start, end - start);
- if (c && end + 1 < q->str_end) {
- efs_op op;
- switch (end[1]) {
- case '!' :
- mode = GRN_OP_NOT_EQUAL;
- q->cur = end + 2;
- break;
- case '=' :
- if (q->flags & GRN_EXPR_ALLOW_UPDATE) {
- mode = GRN_OP_ASSIGN;
- q->cur = end + 2;
- } else {
- get_token(ctx, q, &op, c, mode);
- }
- break;
- case '<' :
- if (end + 2 < q->str_end && end[2] == '=') {
- mode = GRN_OP_LESS_EQUAL;
- q->cur = end + 3;
- } else {
- mode = GRN_OP_LESS;
- q->cur = end + 2;
- }
- break;
- case '>' :
- if (end + 2 < q->str_end && end[2] == '=') {
- mode = GRN_OP_GREATER_EQUAL;
- q->cur = end + 3;
- } else {
- mode = GRN_OP_GREATER;
- q->cur = end + 2;
- }
- break;
- case '%' :
- mode = GRN_OP_MATCH;
- q->cur = end + 2;
- break;
- case '@' :
- q->cur = end + 2;
- return get_geocond(ctx, q, column, c);
- break;
- default :
- mode = GRN_OP_EQUAL;
- q->cur = end + 1;
- break;
- }
- return get_token(ctx, q, &op, c, mode);
- } else {
- ERR(GRN_INVALID_ARGUMENT, "column lookup failed");
- return ctx->rc;
- }
- } else if (*end == GRN_QUERY_PREFIX) {
- mode = GRN_OP_PREFIX;
- q->cur = end + 1;
- break;
- }
- end += len;
- }
- if (!column) {
- ERR(GRN_INVALID_ARGUMENT, "column missing");
- return ctx->rc;
- }
- if (mode == GRN_OP_ASSIGN) {
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_ASSIGN, 2);
- } else {
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1);
- switch (mode) {
- case GRN_OP_NEAR :
- case GRN_OP_NEAR2 :
- case GRN_OP_SIMILAR :
- case GRN_OP_TERM_EXTRACT :
- grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, mode, 3);
- break;
- default :
- grn_expr_append_op(ctx, q->e, mode, 2);
- break;
- }
- }
- return GRN_SUCCESS;
-}
-
static grn_bool
get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option)
{
@@ -5738,90 +5502,6 @@ get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option)
return found;
}
-static grn_rc
-get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode)
-{
- int option = 0;
- op->op = q->default_op;
- op->weight = DEFAULT_WEIGHT;
- for (;;) {
- skip_space(ctx, q);
- if (q->cur >= q->str_end) { return GRN_END_OF_DATA; }
- switch (*q->cur) {
- case '\0' :
- return GRN_END_OF_DATA;
- break;
- case GRN_QUERY_PARENR :
- q->cur++;
- return GRN_END_OF_DATA;
- break;
- case GRN_QUERY_QUOTEL :
- q->cur++;
- return get_phrase(ctx, q, column, mode, option);
- break;
- case GRN_QUERY_PREFIX :
- q->cur++;
- get_op(q, op, &mode, &option);
- break;
- case GRN_QUERY_AND :
- q->cur++;
- op->op = GRN_OP_AND;
- break;
- case GRN_QUERY_AND_NOT :
- q->cur++;
- op->op = GRN_OP_AND_NOT;
- break;
- case GRN_QUERY_ADJ_INC :
- q->cur++;
- if (op->weight < 127) { op->weight++; }
- op->op = GRN_OP_ADJUST;
- break;
- case GRN_QUERY_ADJ_DEC :
- q->cur++;
- if (op->weight > -128) { op->weight--; }
- op->op = GRN_OP_ADJUST;
- break;
- case GRN_QUERY_ADJ_NEG :
- q->cur++;
- op->op = GRN_OP_ADJUST;
- op->weight = -1;
- break;
- case GRN_QUERY_PARENL :
- q->cur++;
- return get_expr(ctx, q, column, mode);
- break;
- case 'O' :
- if (q->cur[1] == 'R' && q->cur[2] == ' ') {
- q->cur += 2;
- op->op = GRN_OP_OR;
- break;
- }
- /* fallthru */
- default :
- return get_word(ctx, q, column, mode, option);
- break;
- }
- }
- return GRN_SUCCESS;
-}
-
-static grn_rc
-get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode)
-{
- efs_op op;
- grn_rc rc = get_token(ctx, q, &op, column, mode);
- if (rc) { return rc; }
- while (!(rc = get_token(ctx, q, &op, column, mode))) {
- if (op.op == GRN_OP_ADJUST) {
- grn_expr_append_const_int(ctx, q->e, op.weight, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, op.op, 3);
- } else {
- grn_expr_append_op(ctx, q->e, op.op, 2);
- }
- }
- return rc;
-}
-
#define DISABLE_UNUSED_CODE 1
#ifndef DISABLE_UNUSED_CODE
static const char *
@@ -5936,8 +5616,8 @@ section_weight_cb(grn_ctx *ctx, grn_hash *r, const void *rid, int sid, void *arg
}
#endif
-#include "ecmascript.h"
-#include "ecmascript.c"
+#include "grn_ecmascript.h"
+#include "grn_ecmascript.c"
static grn_rc
grn_expr_parser_open(grn_ctx *ctx)
@@ -6072,6 +5752,10 @@ get_word_(grn_ctx *ctx, efs_info *q)
mode = GRN_OP_SUFFIX;
q->cur = end + 2;
break;
+ case '~' :
+ mode = GRN_OP_REGEXP;
+ q->cur = end + 2;
+ break;
default :
mode = GRN_OP_EQUAL;
q->cur = end + 1;
@@ -6090,7 +5774,7 @@ get_word_(grn_ctx *ctx, efs_info *q)
GRN_INT32_PUT(ctx, &q->mode_stack, mode);
return GRN_SUCCESS;
- } else if (GRN_TEXT_LEN(&q->buf) > 1 && *end == GRN_QUERY_PREFIX) {
+ } else if (GRN_TEXT_LEN(&q->buf) > 0 && *end == GRN_QUERY_PREFIX) {
q->cur = end + 1;
GRN_INT32_PUT(ctx, &q->mode_stack, GRN_OP_PREFIX);
break;
@@ -6484,6 +6168,10 @@ parse_script(grn_ctx *ctx, efs_info *q)
PARSE(GRN_EXPR_TOKEN_SUFFIX);
q->cur += 2;
break;
+ case '~' :
+ PARSE(GRN_EXPR_TOKEN_REGEXP);
+ q->cur += 2;
+ break;
default :
PARSE(GRN_EXPR_TOKEN_MATCH);
q->cur++;
@@ -6912,7 +6600,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr,
/*
grn_obj strbuf;
GRN_TEXT_INIT(&strbuf, 0);
- grn_expr_inspect(ctx, &strbuf, expr);
+ grn_expr_inspect_internal(ctx, &strbuf, expr);
GRN_TEXT_PUTC(ctx, &strbuf, '\0');
GRN_LOG(ctx, GRN_LOG_NOTICE, "query=(%s)", GRN_TEXT_VALUE(&strbuf));
GRN_OBJ_FIN(ctx, &strbuf);
@@ -7156,3 +6844,63 @@ grn_expr_syntax_escape_query(grn_ctx *ctx, const char *query, int query_size,
target_characters, GRN_QUERY_ESCAPE,
escaped_query);
}
+
+grn_rc
+grn_expr_dump_plan(grn_ctx *ctx, grn_obj *expr, grn_obj *buffer)
+{
+ int n;
+ scan_info **sis;
+
+ GRN_API_ENTER;
+ sis = scan_info_build(ctx, expr, &n, GRN_OP_OR, 0);
+ if (sis) {
+ int i;
+ grn_inspect_scan_info_list(ctx, buffer, sis, n);
+ for (i = 0; i < n; i++) {
+ SI_FREE(sis[i]);
+ }
+ GRN_FREE(sis);
+ } else {
+ GRN_TEXT_PUTS(ctx, buffer, "sequential search\n");
+ }
+ GRN_API_RETURN(GRN_SUCCESS);
+}
+
+static unsigned int
+grn_expr_estimate_size_raw(grn_ctx *ctx, grn_obj *expr, grn_obj *table)
+{
+ return grn_table_size(ctx, table);
+}
+
+unsigned int
+grn_expr_estimate_size(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_obj *table;
+ grn_obj *variable;
+ unsigned int size;
+
+ variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+ if (!variable) {
+ ERR(GRN_INVALID_ARGUMENT, "at least one variable must be defined");
+ return 0;
+ }
+
+ table = grn_ctx_at(ctx, variable->header.domain);
+ if (!table) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "variable refers unknown domain: <%u>", variable->header.domain);
+ return 0;
+ }
+
+ GRN_API_ENTER;
+#ifdef GRN_WITH_MRUBY
+ if (ctx->impl->mrb.state) {
+ size = grn_mrb_expr_estimate_size(ctx, expr, table);
+ } else {
+ size = grn_expr_estimate_size_raw(ctx, expr, table);
+ }
+#else
+ size = grn_expr_estimate_size_raw(ctx, expr, table);
+#endif
+ GRN_API_RETURN(size);
+}