summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/expr.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/expr.c1137
1 files changed, 423 insertions, 714 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/expr.c b/storage/mroonga/vendor/groonga/lib/expr.c
index ef477eb77ee..f13502a0f14 100644
--- a/storage/mroonga/vendor/groonga/lib/expr.c
+++ b/storage/mroonga/vendor/groonga/lib/expr.c
@@ -19,12 +19,10 @@
#include "grn_db.h"
#include "grn_ctx_impl.h"
#include <string.h>
-#include <float.h>
#include "grn_ii.h"
#include "grn_geo.h"
#include "grn_expr.h"
#include "grn_util.h"
-#include "grn_normalizer.h"
#include "grn_mrb.h"
#include "mrb/mrb_expr.h"
@@ -357,7 +355,7 @@ grn_expr_open(grn_ctx *ctx, grn_obj_spec *spec, const uint8_t *p, const uint8_t
{
grn_expr *expr = NULL;
if ((expr = GRN_MALLOCN(grn_expr, 1))) {
- int size = 256;
+ int size = GRN_STACK_SIZE;
expr->consts = NULL;
expr->nconsts = 0;
GRN_TEXT_INIT(&expr->name_buf, 0);
@@ -722,6 +720,21 @@ grn_expr_get_var_by_offset(grn_ctx *ctx, grn_obj *expr, unsigned int offset)
DFI_PUT(e, type, domain, code); \
} while (0)
+static void
+grn_expr_append_obj_resolve_const(grn_ctx *ctx,
+ grn_obj *obj,
+ grn_id to_domain)
+{
+ grn_obj dest;
+
+ GRN_OBJ_INIT(&dest, GRN_BULK, 0, to_domain);
+ if (!grn_obj_cast(ctx, obj, &dest, GRN_FALSE)) {
+ grn_obj_reinit(ctx, obj, to_domain, 0);
+ grn_bulk_write(ctx, obj, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
+ }
+ GRN_OBJ_FIN(ctx, &dest);
+}
+
grn_obj *
grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op, int nargs)
{
@@ -854,26 +867,14 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op,
if (CONSTP(y)) {
/* todo */
} else {
- grn_obj dest;
if (xd != yd) {
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, yd);
- if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) {
- grn_obj_reinit(ctx, x, yd, 0);
- grn_bulk_write(ctx, x, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
- }
- GRN_OBJ_FIN(ctx, &dest);
+ grn_expr_append_obj_resolve_const(ctx, x, yd);
}
}
} else {
if (CONSTP(y)) {
- grn_obj dest;
if (xd != yd) {
- GRN_OBJ_INIT(&dest, GRN_BULK, 0, xd);
- if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) {
- grn_obj_reinit(ctx, y, xd, 0);
- grn_bulk_write(ctx, y, GRN_BULK_HEAD(&dest), GRN_BULK_VSIZE(&dest));
- }
- GRN_OBJ_FIN(ctx, &dest);
+ grn_expr_append_obj_resolve_const(ctx, y, xd);
}
}
}
@@ -908,6 +909,7 @@ grn_expr_append_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj, grn_operator op,
case GRN_OP_GET_REF :
case GRN_OP_ADJUST :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
PUSH_CODE(e, op, obj, nargs, code);
if (nargs) {
int i = nargs - 1;
@@ -2223,274 +2225,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
} \
} while (0)
-static grn_bool
-string_is_contained(grn_ctx *ctx,
- const char *text, unsigned int text_len,
- const char *sub_text, unsigned int sub_text_len)
-{
- /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that
- * is used in snip.c. */
- const char *text_end = text + text_len;
- unsigned int sub_text_current = 0;
-
- for (; text < text_end; text++) {
- if (text[0] == sub_text[sub_text_current]) {
- sub_text_current++;
- if (sub_text_current == sub_text_len) {
- return GRN_TRUE;
- }
- } else {
- sub_text_current = 0;
- }
- }
-
- return GRN_FALSE;
-}
-
-static grn_bool
-pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
- const char *x, unsigned int x_len,
- const char *y, unsigned int y_len)
-{
- grn_obj *normalizer;
- grn_obj *norm_x;
- grn_obj *norm_y;
- const char *norm_x_raw;
- const char *norm_y_raw;
- unsigned int norm_x_raw_length_in_bytes;
- unsigned int norm_y_raw_length_in_bytes;
- grn_bool matched = GRN_FALSE;
-
- if (x_len == 0 || y_len == 0) {
- return GRN_FALSE;
- }
-
- normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
- norm_x = grn_string_open(ctx, x, x_len, normalizer, 0);
- norm_y = grn_string_open(ctx, y, y_len, normalizer, 0);
- grn_string_get_normalized(ctx, norm_x,
- &norm_x_raw, &norm_x_raw_length_in_bytes,
- NULL);
- grn_string_get_normalized(ctx, norm_y,
- &norm_y_raw, &norm_y_raw_length_in_bytes,
- NULL);
- matched = string_is_contained(ctx,
- norm_x_raw, norm_x_raw_length_in_bytes,
- norm_y_raw, norm_y_raw_length_in_bytes);
-
- grn_obj_close(ctx, norm_x);
- grn_obj_close(ctx, norm_y);
- grn_obj_unlink(ctx, normalizer);
-
- return matched;
-}
-
-static grn_bool
-pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table,
- grn_obj *y)
-{
- grn_obj *normalizer;
- char x_key[GRN_TABLE_MAX_KEY_SIZE];
- int x_key_len;
- grn_bool matched = GRN_FALSE;
-
- if (table->header.domain != GRN_DB_SHORT_TEXT) {
- return GRN_FALSE;
- }
-
- x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
- x_key, GRN_TABLE_MAX_KEY_SIZE);
- grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
- if (normalizer) {
- grn_obj *norm_y;
- const char *norm_y_raw;
- unsigned int norm_y_raw_length_in_bytes;
- norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
- normalizer, 0);
- grn_string_get_normalized(ctx, norm_y,
- &norm_y_raw, &norm_y_raw_length_in_bytes,
- NULL);
- matched = string_is_contained(ctx,
- x_key, x_key_len,
- norm_y_raw, norm_y_raw_length_in_bytes);
- grn_obj_close(ctx, norm_y);
- } else {
- matched = pseudo_query_scan_raw_text_raw_text(ctx,
- x_key,
- x_key_len,
- GRN_TEXT_VALUE(y),
- GRN_TEXT_LEN(y));
- }
-
- return matched;
-}
-
-static grn_bool
-pseudo_query_scan_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
- return pseudo_query_scan_raw_text_raw_text(ctx,
- GRN_TEXT_VALUE(x),
- GRN_TEXT_LEN(x),
- GRN_TEXT_VALUE(y),
- GRN_TEXT_LEN(y));
-}
-
-static grn_bool
-pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
- switch (x->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- switch (y->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- return pseudo_query_scan_text_text(ctx, x, y);
- default :
- break;
- }
- return GRN_FALSE;
- default:
- {
- grn_obj *domain;
- domain = grn_ctx_at(ctx, x->header.domain);
- if (GRN_OBJ_TABLEP(domain)) {
- switch (y->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- return pseudo_query_scan_record_text(ctx, x, domain, y);
- default :
- break;
- }
- }
- }
- return GRN_FALSE;
- }
-}
-
-static grn_bool
-pseudo_prefix_search_match(grn_ctx *ctx,
- const char *x, unsigned int x_len,
- const char *y, unsigned int y_len)
-{
- return (x_len >= y_len && strncmp(x, y, y_len) == 0);
-}
-
-static grn_bool
-pseudo_prefix_search_raw_text_raw_text(grn_ctx *ctx,
- const char *x, unsigned int x_len,
- const char *y, unsigned int y_len)
-{
- grn_obj *normalizer;
- grn_obj *norm_x;
- grn_obj *norm_y;
- const char *norm_x_raw;
- const char *norm_y_raw;
- unsigned int norm_x_raw_len;
- unsigned int norm_y_raw_len;
- grn_bool matched = GRN_FALSE;
-
- normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
- norm_x = grn_string_open(ctx, x, x_len, normalizer, 0);
- norm_y = grn_string_open(ctx, y, y_len, normalizer, 0);
- grn_string_get_normalized(ctx, norm_x, &norm_x_raw, &norm_x_raw_len, NULL);
- grn_string_get_normalized(ctx, norm_y, &norm_y_raw, &norm_y_raw_len, NULL);
- matched = pseudo_prefix_search_match(ctx,
- norm_x_raw, norm_x_raw_len,
- norm_y_raw, norm_y_raw_len);
-
- grn_obj_close(ctx, norm_x);
- grn_obj_close(ctx, norm_y);
- grn_obj_unlink(ctx, normalizer);
-
- return matched;
-}
-
-static grn_bool
-pseudo_prefix_search_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table,
- grn_obj *y)
-{
- grn_obj *normalizer;
- char x_key[GRN_TABLE_MAX_KEY_SIZE];
- int x_key_len;
- grn_bool matched = GRN_FALSE;
-
- if (table->header.domain != GRN_DB_SHORT_TEXT) {
- return GRN_FALSE;
- }
-
- x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
- x_key, GRN_TABLE_MAX_KEY_SIZE);
- grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
- if (normalizer) {
- grn_obj *norm_y;
- const char *norm_y_raw;
- unsigned int norm_y_raw_len;
- norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
- normalizer, 0);
- grn_string_get_normalized(ctx, norm_y, &norm_y_raw, &norm_y_raw_len, NULL);
- matched = pseudo_prefix_search_match(ctx,
- x_key, x_key_len,
- norm_y_raw, norm_y_raw_len);
- grn_obj_close(ctx, norm_y);
- } else {
- matched = pseudo_prefix_search_raw_text_raw_text(ctx,
- x_key,
- x_key_len,
- GRN_TEXT_VALUE(y),
- GRN_TEXT_LEN(y));
- }
-
- return matched;
-}
-
-static grn_bool
-pseudo_prefix_search_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
- return pseudo_prefix_search_raw_text_raw_text(ctx,
- GRN_TEXT_VALUE(x),
- GRN_TEXT_LEN(x),
- GRN_TEXT_VALUE(y),
- GRN_TEXT_LEN(y));
-}
-
-static grn_bool
-pseudo_prefix_search(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
- switch (x->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- switch (y->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- return pseudo_prefix_search_text_text(ctx, x, y);
- default :
- break;
- }
- return GRN_FALSE;
- default:
- {
- grn_obj *domain;
- domain = grn_ctx_at(ctx, x->header.domain);
- if (GRN_OBJ_TABLEP(domain)) {
- switch (y->header.domain) {
- case GRN_DB_SHORT_TEXT :
- case GRN_DB_TEXT :
- case GRN_DB_LONG_TEXT :
- return pseudo_prefix_search_record_text(ctx, x, domain, y);
- default :
- break;
- }
- }
- }
- return GRN_FALSE;
- }
-}
-
inline static void
grn_expr_exec_get_member(grn_ctx *ctx,
grn_obj *expr,
@@ -2534,12 +2268,45 @@ grn_expr_exec_get_member(grn_ctx *ctx,
GRN_OBJ_FIN(ctx, &values);
}
+static inline grn_bool
+grn_expr_exec_is_simple_expr(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_expr *e = (grn_expr *)expr;
+
+ if (expr->header.type != GRN_EXPR) {
+ return GRN_FALSE;
+ }
+
+ if (e->codes_curr != 1) {
+ return GRN_FALSE;
+ }
+
+ switch (e->codes[0].op) {
+ case GRN_OP_PUSH :
+ return GRN_TRUE;
+ default :
+ return GRN_FALSE;
+ }
+}
+
+static inline grn_obj *
+grn_expr_exec_simple(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_expr *e = (grn_expr *)expr;
+
+ return e->codes[0].value;
+}
+
grn_obj *
grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
{
grn_obj *val = NULL;
uint32_t stack_curr = ctx->impl->stack_curr;
GRN_API_ENTER;
+ if (grn_expr_exec_is_simple_expr(ctx, expr)) {
+ val = grn_expr_exec_simple(ctx, expr);
+ GRN_API_RETURN(val);
+ }
if (expr->header.type == GRN_PROC) {
grn_proc *proc = (grn_proc *)expr;
if (proc->type == GRN_PROC_COMMAND) {
@@ -3170,7 +2937,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
POP1(y);
POP1(x);
WITH_SPSAVE({
- matched = pseudo_query_scan(ctx, x, y);
+ matched = grn_operator_exec_match(ctx, x, y);
});
ALLOC1(res);
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
@@ -3207,7 +2974,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
POP1(y);
POP1(x);
WITH_SPSAVE({
- matched = pseudo_prefix_search(ctx, x, y);
+ matched = grn_operator_exec_prefix(ctx, x, y);
});
ALLOC1(res);
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
@@ -3697,6 +3464,21 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
code++;
}
break;
+ case GRN_OP_REGEXP :
+ {
+ grn_obj *target, *pattern;
+ grn_bool matched;
+ POP1(pattern);
+ POP1(target);
+ WITH_SPSAVE({
+ matched = grn_operator_exec_regexp(ctx, target, pattern);
+ });
+ ALLOC1(res);
+ grn_obj_reinit(ctx, res, GRN_DB_BOOL, 0);
+ GRN_BOOL_SET(ctx, res, matched);
+ }
+ code++;
+ break;
default :
ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "not implemented operator assigned");
goto exit;
@@ -3753,12 +3535,17 @@ struct _grn_scan_info {
grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS];
int max_interval;
int similarity_threshold;
- grn_obj *scorer;
+ grn_obj scorers;
+ grn_obj scorer_args_exprs;
+ grn_obj scorer_args_expr_offsets;
};
#define SI_FREE(si) do {\
GRN_OBJ_FIN(ctx, &(si)->wv);\
GRN_OBJ_FIN(ctx, &(si)->index);\
+ GRN_OBJ_FIN(ctx, &(si)->scorers);\
+ GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\
+ GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\
GRN_FREE(si);\
} while (0)
@@ -3777,7 +3564,9 @@ struct _grn_scan_info {
(si)->max_interval = DEFAULT_MAX_INTERVAL;\
(si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\
(si)->start = (st);\
- (si)->scorer = NULL;\
+ GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+ GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+ GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\
} while (0)
static scan_info **
@@ -3821,9 +3610,9 @@ put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int star
} else {
s_->flags &= ~SCAN_PUSH;
s_->logical_op = op;
- memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j));
- memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r));
- memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j));
+ grn_memcpy(&sis[i], &sis[j], sizeof(scan_info *) * (r - j));
+ grn_memmove(&sis[j], &sis[r], sizeof(scan_info *) * (i - r));
+ grn_memcpy(&sis[i + j - r], &sis[i], sizeof(scan_info *) * (r - j));
}
break;
}
@@ -4003,11 +3792,18 @@ grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr)
static void
-scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+scan_info_put_index(grn_ctx *ctx, scan_info *si,
+ grn_obj *index, uint32_t sid, int32_t weight,
+ grn_obj *scorer,
+ grn_obj *scorer_args_expr,
+ uint32_t scorer_args_expr_offset)
{
GRN_PTR_PUT(ctx, &si->index, index);
GRN_UINT32_PUT(ctx, &si->wv, sid);
GRN_INT32_PUT(ctx, &si->wv, weight);
+ GRN_PTR_PUT(ctx, &si->scorers, scorer);
+ GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr);
+ GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset);
{
int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1;
grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni);
@@ -4015,10 +3811,10 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i
if (index == pi[-1]) {
if (i) {
int32_t *pw = &GRN_INT32_VALUE_AT(&si->wv, (ni - i) * 2);
- memmove(pw + 2, pw, sizeof(int32_t) * 2 * i);
+ grn_memmove(pw + 2, pw, sizeof(int32_t) * 2 * i);
pw[0] = (int32_t) sid;
pw[1] = weight;
- memmove(pi + 1, pi, sizeof(grn_obj *) * i);
+ grn_memmove(pi + 1, pi, sizeof(grn_obj *) * i);
pi[0] = index;
}
return;
@@ -4028,10 +3824,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i
}
static int32_t
-get_weight(grn_ctx *ctx, grn_expr_code *ec)
+get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
{
if (ec->modify == 2 && ec[2].op == GRN_OP_STAR &&
ec[1].value && ec[1].value->header.type == GRN_BULK) {
+ if (offset) {
+ *offset = 2;
+ }
if (ec[1].value->header.domain == GRN_DB_INT32 ||
ec[1].value->header.domain == GRN_DB_UINT32) {
return GRN_INT32_VALUE(ec[1].value);
@@ -4046,6 +3845,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec)
return weight;
}
} else {
+ if (offset) {
+ *offset = 0;
+ }
return 1;
}
}
@@ -4067,7 +3869,9 @@ grn_scan_info_open(grn_ctx *ctx, int start)
si->max_interval = DEFAULT_MAX_INTERVAL;
si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
si->start = start;
- si->scorer = NULL;
+ GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);
+ GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);
+ GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR);
return si;
}
@@ -4079,9 +3883,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si)
}
void
-grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+grn_scan_info_put_index(grn_ctx *ctx, scan_info *si,
+ grn_obj *index, uint32_t sid, int32_t weight,
+ grn_obj *scorer,
+ grn_obj *scorer_args_expr,
+ uint32_t scorer_args_expr_offset)
{
- scan_info_put_index(ctx, si, index, sid, weight);
+ scan_info_put_index(ctx, si, index, sid, weight,
+ scorer,
+ scorer_args_expr,
+ scorer_args_expr_offset);
}
scan_info **
@@ -4092,9 +3903,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip,
}
int32_t
-grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec)
+grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
{
- return get_weight(ctx, ec);
+ return get_weight(ctx, ec, offset);
}
int
@@ -4169,18 +3980,6 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold)
si->similarity_threshold = similarity_threshold;
}
-grn_obj *
-grn_scan_info_get_scorer(scan_info *si)
-{
- return si->scorer;
-}
-
-void
-grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer)
-{
- si->scorer = scorer;
-}
-
grn_bool
grn_scan_info_push_arg(scan_info *si, grn_obj *arg)
{
@@ -4202,32 +4001,64 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i)
}
static uint32_t
-scan_info_build_find_index_column_index(grn_ctx *ctx,
- scan_info *si,
- grn_expr_code *ec,
- uint32_t n_rest_codes,
- grn_operator op)
+scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si,
+ grn_expr *expr, uint32_t i,
+ grn_obj **index,
+ int *sid)
{
- uint32_t offset = 0;
- grn_obj *index;
- int sid = 0;
- int32_t weight;
-
- index = ec->value;
- if (n_rest_codes >= 2 &&
- ec[1].value &&
- (ec[1].value->header.domain == GRN_DB_INT32 ||
- ec[1].value->header.domain == GRN_DB_UINT32) &&
- ec[2].op == GRN_OP_GET_MEMBER) {
- if (ec[1].value->header.domain == GRN_DB_INT32) {
- sid = GRN_INT32_VALUE(ec[1].value) + 1;
- } else {
- sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+ grn_expr_code *ec;
+ uint32_t offset = 1;
+ grn_index_datum index_datum;
+ unsigned int n_index_data = 0;
+
+ ec = &(expr->codes[i]);
+ switch (ec->value->header.type) {
+ case GRN_ACCESSOR :
+ n_index_data = grn_column_find_index_data(ctx, ec->value, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ grn_accessor *a = (grn_accessor *)(ec->value);
+ *sid = index_datum.section;
+ if (a->next && a->obj != index_datum.index) {
+ *index = ec->value;
+ } else {
+ *index = index_datum.index;
+ }
+ }
+ break;
+ case GRN_COLUMN_FIX_SIZE :
+ case GRN_COLUMN_VAR_SIZE :
+ n_index_data = grn_column_find_index_data(ctx, ec->value, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ *index = index_datum.index;
+ *sid = index_datum.section;
+ }
+ break;
+ case GRN_COLUMN_INDEX :
+ {
+ uint32_t n_rest_codes;
+
+ *index = ec->value;
+
+ n_rest_codes = expr->codes_curr - i;
+ if (n_rest_codes >= 2 &&
+ ec[1].value &&
+ (ec[1].value->header.domain == GRN_DB_INT32 ||
+ ec[1].value->header.domain == GRN_DB_UINT32) &&
+ ec[2].op == GRN_OP_GET_MEMBER) {
+ if (ec[1].value->header.domain == GRN_DB_INT32) {
+ *sid = GRN_INT32_VALUE(ec[1].value) + 1;
+ } else {
+ *sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+ }
+ offset += 2;
+ }
}
- offset = 2;
+ break;
+ default :
+ break;
}
- weight = get_weight(ctx, ec + offset);
- scan_info_put_index(ctx, si, index, sid, weight);
return offset;
}
@@ -4237,8 +4068,9 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
grn_expr *expr, uint32_t i)
{
grn_expr_code *ec;
- grn_obj *index;
- int sid;
+ grn_obj *index = NULL;
+ int sid = 0;
+ uint32_t offset = 0;
ec = &(expr->codes[i]);
if (!ec->value) {
@@ -4247,29 +4079,19 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
switch (ec->value->header.type) {
case GRN_ACCESSOR :
- if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) {
- int32_t weight = get_weight(ctx, ec);
- si->flags |= SCAN_ACCESSOR;
- if (((grn_accessor *)ec->value)->next) {
- scan_info_put_index(ctx, si, ec->value, sid, weight);
- } else {
- scan_info_put_index(ctx, si, index, sid, weight);
- }
- }
- break;
case GRN_COLUMN_FIX_SIZE :
case GRN_COLUMN_VAR_SIZE :
- if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec));
- }
- break;
case GRN_COLUMN_INDEX :
- {
- uint32_t n_rest_codes;
- uint32_t offset;
- n_rest_codes = expr->codes_curr - i;
- offset = scan_info_build_find_index_column_index(ctx, si, ec,
- n_rest_codes, si->op);
+ offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+ &index, &sid);
+ i += offset - 1;
+ if (index) {
+ if (ec->value->header.type == GRN_ACCESSOR) {
+ si->flags |= SCAN_ACCESSOR;
+ }
+ scan_info_put_index(ctx, si, index, sid,
+ get_weight(ctx, &(expr->codes[i]), &offset),
+ NULL, NULL, 0);
i += offset;
}
break;
@@ -4285,24 +4107,27 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
GRN_OBJ_FIN(ctx, &inspected);
return expr->codes_curr;
}
- si->scorer = ec->value;
- i = scan_info_build_match_expr_codes(ctx, si, expr, i + 1);
- if (expr->codes[i].op != GRN_OP_CALL) {
- grn_obj inspected;
- GRN_TEXT_INIT(&inspected, 0);
- grn_inspect(ctx, &inspected, si->scorer);
- ERR(GRN_INVALID_ARGUMENT,
- "scorer must have only one argument: <%.*s>",
- (int)GRN_TEXT_LEN(&inspected),
- GRN_TEXT_VALUE(&inspected));
- GRN_OBJ_FIN(ctx, &inspected);
- return expr->codes_curr;
+ i++;
+ offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+ &index, &sid);
+ i += offset;
+ if (index) {
+ uint32_t scorer_args_expr_offset = 0;
+ if (expr->codes[i].op != GRN_OP_CALL) {
+ scorer_args_expr_offset = i;
+ }
+ while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) {
+ i++;
+ }
+ scan_info_put_index(ctx, si, index, sid,
+ get_weight(ctx, &(expr->codes[i]), &offset),
+ ec->value,
+ (grn_obj *)expr,
+ scorer_args_expr_offset);
+ i += offset;
}
break;
- case GRN_TABLE_NO_KEY :
- case GRN_TABLE_HASH_KEY :
- case GRN_TABLE_PAT_KEY :
- case GRN_TABLE_DAT_KEY :
+ default :
{
char name[GRN_TABLE_MAX_KEY_SIZE];
int name_size;
@@ -4328,26 +4153,144 @@ scan_info_build_match_expr(grn_ctx *ctx, scan_info *si, grn_expr *expr)
}
}
+static grn_bool
+is_index_searchable_regexp(grn_ctx *ctx, grn_obj *regexp)
+{
+ const char *regexp_raw;
+ const char *regexp_raw_end;
+ grn_bool escaping = GRN_FALSE;
+
+ if (!(regexp->header.domain == GRN_DB_SHORT_TEXT ||
+ regexp->header.domain == GRN_DB_TEXT ||
+ regexp->header.domain == GRN_DB_LONG_TEXT)) {
+ return GRN_FALSE;
+ }
+
+ regexp_raw = GRN_TEXT_VALUE(regexp);
+ regexp_raw_end = regexp_raw + GRN_TEXT_LEN(regexp);
+
+ while (regexp_raw < regexp_raw_end) {
+ unsigned int char_len;
+
+ char_len = grn_charlen(ctx, regexp_raw, regexp_raw_end);
+ if (char_len == 0) {
+ return GRN_FALSE;
+ }
+
+ if (char_len == 1) {
+ if (escaping) {
+ escaping = GRN_FALSE;
+ switch (regexp_raw[0]) {
+ case 'Z' :
+ case 'b' :
+ case 'B' :
+ case 'd' :
+ case 'D' :
+ case 'h' :
+ case 'H' :
+ case 'p' :
+ case 's' :
+ case 'S' :
+ case 'w' :
+ case 'W' :
+ case 'X' :
+ case 'k' :
+ case 'g' :
+ case '1' :
+ case '2' :
+ case '3' :
+ case '4' :
+ case '5' :
+ case '6' :
+ case '7' :
+ case '8' :
+ case '9' :
+ return GRN_FALSE;
+ default :
+ break;
+ }
+ } else {
+ switch (regexp_raw[0]) {
+ case '.' :
+ case '[' :
+ case ']' :
+ case '|' :
+ case '?' :
+ case '+' :
+ case '*' :
+ case '{' :
+ case '}' :
+ case '^' :
+ case '$' :
+ case '(' :
+ case ')' :
+ escaping = GRN_FALSE;
+ return GRN_FALSE;
+ case '\\' :
+ escaping = GRN_TRUE;
+ break;
+ default :
+ escaping = GRN_FALSE;
+ break;
+ }
+ }
+ } else {
+ escaping = GRN_FALSE;
+ }
+
+ regexp_raw += char_len;
+ }
+
+ return GRN_TRUE;
+}
+
static void
scan_info_build_match(grn_ctx *ctx, scan_info *si)
{
- int sid;
- grn_obj *index, **p = si->args, **pe = si->args + si->nargs;
+ grn_obj **p, **pe;
+
+ if (si->op == GRN_OP_REGEXP) {
+ p = si->args;
+ pe = si->args + si->nargs;
+ for (; p < pe; p++) {
+ if ((*p)->header.type == GRN_BULK &&
+ !is_index_searchable_regexp(ctx, *p)) {
+ return;
+ }
+ }
+ }
+
+ p = si->args;
+ pe = si->args + si->nargs;
for (; p < pe; p++) {
if ((*p)->header.type == GRN_EXPR) {
scan_info_build_match_expr(ctx, si, (grn_expr *)(*p));
} else if (GRN_DB_OBJP(*p)) {
- if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, *p, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else if (GRN_ACCESSORP(*p)) {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
si->flags |= SCAN_ACCESSOR;
- if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) {
+ n_index_data = grn_column_find_index_data(ctx, *p, si->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ grn_obj *index;
if (((grn_accessor *)(*p))->next) {
- scan_info_put_index(ctx, si, *p, sid, 1);
+ index = *p;
} else {
- scan_info_put_index(ctx, si, index, sid, 1);
+ index = index_datum.index;
}
+ scan_info_put_index(ctx, si,
+ index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else {
switch (si->op) {
@@ -4412,6 +4355,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
case GRN_OP_GEO_WITHINP6 :
case GRN_OP_GEO_WITHINP8 :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
if (stat < SCAN_COL1 || SCAN_CONST < stat) { return NULL; }
stat = SCAN_START;
m++;
@@ -4492,6 +4436,7 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
case GRN_OP_GEO_WITHINP6 :
case GRN_OP_GEO_WITHINP8 :
case GRN_OP_TERM_EXTRACT :
+ case GRN_OP_REGEXP :
stat = SCAN_START;
si->op = c->op;
si->end = c - e->codes;
@@ -4572,17 +4517,28 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
sis[i++] = si;
/* better index resolving framework for functions should be implemented */
{
- int sid;
- grn_obj *index, **p = si->args, **pe = si->args + si->nargs;
+ grn_obj **p = si->args, **pe = si->args + si->nargs;
for (; p < pe; p++) {
if (GRN_DB_OBJP(*p)) {
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else if (GRN_ACCESSORP(*p)) {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
si->flags |= SCAN_ACCESSOR;
- if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
- scan_info_put_index(ctx, si, index, sid, 1);
+ n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+ &index_datum, 1);
+ if (n_index_data > 0) {
+ scan_info_put_index(ctx, si,
+ index_datum.index, index_datum.section, 1,
+ NULL, NULL, 0);
}
} else {
si->query = *p;
@@ -4924,10 +4880,18 @@ grn_table_select_index_range_accessor(grn_ctx *ctx, grn_obj *table,
accessor = (grn_accessor *)GRN_PTR_VALUE_AT(accessor_stack, i - 1);
target = accessor->obj;
- if (grn_column_index(ctx, target, GRN_OP_EQUAL, &index, 1, &section) == 0) {
- grn_obj_unlink(ctx, current_res);
- current_res = NULL;
- break;
+ {
+ grn_index_datum index_datum;
+ unsigned int n_index_data;
+ n_index_data = grn_column_find_index_data(ctx, target, GRN_OP_EQUAL,
+ &index_datum, 1);
+ if (n_index_data == 0) {
+ grn_obj_unlink(ctx, current_res);
+ current_res = NULL;
+ break;
+ }
+ index = index_datum.index;
+ section = index_datum.section;
}
if (section > 0) {
@@ -5100,7 +5064,10 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
GRN_BULK_HEAD(si->query),
GRN_BULK_VSIZE(si->query));
}
- grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res, si->logical_op);
+ if (tid != GRN_ID_NIL) {
+ grn_ii_at(ctx, (grn_ii *)index, tid, (grn_hash *)res,
+ si->logical_op);
+ }
}
grn_ii_resolve_sel_and(ctx, (grn_hash *)res, si->logical_op);
processed = GRN_TRUE;
@@ -5188,9 +5155,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
case GRN_OP_NEAR :
case GRN_OP_NEAR2 :
case GRN_OP_SIMILAR :
+ case GRN_OP_REGEXP :
{
grn_obj wv, **ip = &GRN_PTR_VALUE(&si->index);
- int j = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *);
+ int j;
+ int n_indexes = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *);
int32_t *wp = &GRN_INT32_VALUE(&si->wv);
grn_search_optarg optarg;
GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR);
@@ -5217,21 +5186,31 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
optarg.vector_size = 1;
optarg.proc = NULL;
optarg.max_size = 0;
- optarg.scorer = si->scorer;
ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND;
- for (; j--; ip++, wp += 2) {
+ for (j = 0; j < n_indexes; j++, ip++, wp += 2) {
uint32_t sid = (uint32_t) wp[0];
int32_t weight = wp[1];
if (sid) {
int weight_index = sid - 1;
- GRN_INT32_SET_AT(ctx, &wv, weight_index, weight);
+ int current_vector_size;
+ current_vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t);
+ if (weight_index < current_vector_size) {
+ ((int *)GRN_BULK_HEAD(&wv))[weight_index] = weight;
+ } else {
+ GRN_INT32_SET_AT(ctx, &wv, weight_index, weight);
+ }
optarg.weight_vector = &GRN_INT32_VALUE(&wv);
optarg.vector_size = GRN_BULK_VSIZE(&wv)/sizeof(int32_t);
} else {
optarg.weight_vector = NULL;
optarg.vector_size = weight;
}
- if (j) {
+ optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j);
+ optarg.scorer_args_expr =
+ GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j);
+ optarg.scorer_args_expr_offset =
+ GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j);
+ if (j < n_indexes - 1) {
if (sid && ip[0] == ip[1]) { continue; }
} else {
ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND;
@@ -5475,239 +5454,6 @@ skip_space(grn_ctx *ctx, efs_info *q)
}
}
-static grn_rc get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode);
-static grn_rc get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode);
-
-static grn_rc
-get_phrase(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option)
-{
- const char *start, *s;
- start = s = q->cur;
- GRN_BULK_REWIND(&q->buf);
- while (1) {
- unsigned int len;
- if (s >= q->str_end) {
- q->cur = s;
- break;
- }
- len = grn_charlen(ctx, s, q->str_end);
- if (len == 0) {
- /* invalid string containing malformed multibyte char */
- return GRN_END_OF_DATA;
- } else if (len == 1) {
- if (*s == GRN_QUERY_QUOTER) {
- q->cur = s + 1;
- break;
- } else if (*s == GRN_QUERY_ESCAPE && s + 1 < q->str_end) {
- s++;
- len = grn_charlen(ctx, s, q->str_end);
- }
- }
- GRN_TEXT_PUT(ctx, &q->buf, s, len);
- s += len;
- }
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const(ctx, q->e, &q->buf, GRN_OP_PUSH, 1);
- if (mode == GRN_OP_MATCH || mode == GRN_OP_EXACT) {
- grn_expr_append_op(ctx, q->e, mode, 2);
- } else {
- grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, mode, 3);
- }
- return GRN_SUCCESS;
-}
-
-static grn_rc
-get_geocond(grn_ctx *ctx, efs_info *q, grn_obj *longitude, grn_obj *latitude)
-{
- unsigned int len;
- const char *start = q->cur, *end;
- for (end = q->cur;; ) {
- /* null check and length check */
- if (!(len = grn_charlen(ctx, end, q->str_end))) {
- q->cur = q->str_end;
- break;
- }
- if (grn_isspace(end, ctx->encoding) ||
- *end == GRN_QUERY_PARENR) {
- q->cur = end;
- break;
- }
- }
- {
- const char *tokbuf[8];
- int32_t lng0, lat0, lng1, lat1, lng2, lat2, r;
- int32_t n = grn_str_tok((char *)start, end - start, ',', tokbuf, 8, NULL);
- switch (n) {
- case 3 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- r = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, r, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP5, 5);
- break;
- case 4 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP6, 6);
- break;
- case 6 :
- lng0 = grn_atoi(start, tokbuf[0], NULL);
- lat0 = grn_atoi(tokbuf[0] + 1, tokbuf[1], NULL);
- lng1 = grn_atoi(tokbuf[1] + 1, tokbuf[2], NULL);
- lat1 = grn_atoi(tokbuf[2] + 1, tokbuf[3], NULL);
- lng2 = grn_atoi(tokbuf[3] + 1, tokbuf[4], NULL);
- lat2 = grn_atoi(tokbuf[4] + 1, tokbuf[5], NULL);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, longitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, latitude, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_int(ctx, q->e, lng0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat0, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat1, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lng2, GRN_OP_PUSH, 1);
- grn_expr_append_const_int(ctx, q->e, lat2, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GEO_WITHINP8, 8);
- break;
- default :
- ERR(GRN_INVALID_ARGUMENT, "invalid geocond");
- break;
- }
- }
- return ctx->rc;
-}
-
-static grn_rc
-get_word(grn_ctx *ctx, efs_info *q, grn_obj *column, int mode, int option)
-{
- const char *start = q->cur, *end;
- unsigned int len;
- for (end = q->cur;; ) {
- /* null check and length check */
- if (!(len = grn_charlen(ctx, end, q->str_end))) {
- q->cur = q->str_end;
- break;
- }
- if (grn_isspace(end, ctx->encoding) ||
- *end == GRN_QUERY_PARENR) {
- q->cur = end;
- break;
- }
- if (*end == GRN_QUERY_COLUMN) {
- grn_obj *c = grn_obj_column(ctx, q->table, start, end - start);
- if (c && end + 1 < q->str_end) {
- efs_op op;
- switch (end[1]) {
- case '!' :
- mode = GRN_OP_NOT_EQUAL;
- q->cur = end + 2;
- break;
- case '=' :
- if (q->flags & GRN_EXPR_ALLOW_UPDATE) {
- mode = GRN_OP_ASSIGN;
- q->cur = end + 2;
- } else {
- get_token(ctx, q, &op, c, mode);
- }
- break;
- case '<' :
- if (end + 2 < q->str_end && end[2] == '=') {
- mode = GRN_OP_LESS_EQUAL;
- q->cur = end + 3;
- } else {
- mode = GRN_OP_LESS;
- q->cur = end + 2;
- }
- break;
- case '>' :
- if (end + 2 < q->str_end && end[2] == '=') {
- mode = GRN_OP_GREATER_EQUAL;
- q->cur = end + 3;
- } else {
- mode = GRN_OP_GREATER;
- q->cur = end + 2;
- }
- break;
- case '%' :
- mode = GRN_OP_MATCH;
- q->cur = end + 2;
- break;
- case '@' :
- q->cur = end + 2;
- return get_geocond(ctx, q, column, c);
- break;
- default :
- mode = GRN_OP_EQUAL;
- q->cur = end + 1;
- break;
- }
- return get_token(ctx, q, &op, c, mode);
- } else {
- ERR(GRN_INVALID_ARGUMENT, "column lookup failed");
- return ctx->rc;
- }
- } else if (*end == GRN_QUERY_PREFIX) {
- mode = GRN_OP_PREFIX;
- q->cur = end + 1;
- break;
- }
- end += len;
- }
- if (!column) {
- ERR(GRN_INVALID_ARGUMENT, "column missing");
- return ctx->rc;
- }
- if (mode == GRN_OP_ASSIGN) {
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_ASSIGN, 2);
- } else {
- grn_expr_append_obj(ctx, q->e, q->v, GRN_OP_PUSH, 1);
- grn_expr_append_const(ctx, q->e, column, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, GRN_OP_GET_VALUE, 2);
- grn_expr_append_const_str(ctx, q->e, start, end - start, GRN_OP_PUSH, 1);
- switch (mode) {
- case GRN_OP_NEAR :
- case GRN_OP_NEAR2 :
- case GRN_OP_SIMILAR :
- case GRN_OP_TERM_EXTRACT :
- grn_expr_append_const_int(ctx, q->e, option, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, mode, 3);
- break;
- default :
- grn_expr_append_op(ctx, q->e, mode, 2);
- break;
- }
- }
- return GRN_SUCCESS;
-}
-
static grn_bool
get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option)
{
@@ -5756,90 +5502,6 @@ get_op(efs_info *q, efs_op *op, grn_operator *mode, int *option)
return found;
}
-static grn_rc
-get_token(grn_ctx *ctx, efs_info *q, efs_op *op, grn_obj *column, grn_operator mode)
-{
- int option = 0;
- op->op = q->default_op;
- op->weight = DEFAULT_WEIGHT;
- for (;;) {
- skip_space(ctx, q);
- if (q->cur >= q->str_end) { return GRN_END_OF_DATA; }
- switch (*q->cur) {
- case '\0' :
- return GRN_END_OF_DATA;
- break;
- case GRN_QUERY_PARENR :
- q->cur++;
- return GRN_END_OF_DATA;
- break;
- case GRN_QUERY_QUOTEL :
- q->cur++;
- return get_phrase(ctx, q, column, mode, option);
- break;
- case GRN_QUERY_PREFIX :
- q->cur++;
- get_op(q, op, &mode, &option);
- break;
- case GRN_QUERY_AND :
- q->cur++;
- op->op = GRN_OP_AND;
- break;
- case GRN_QUERY_AND_NOT :
- q->cur++;
- op->op = GRN_OP_AND_NOT;
- break;
- case GRN_QUERY_ADJ_INC :
- q->cur++;
- if (op->weight < 127) { op->weight++; }
- op->op = GRN_OP_ADJUST;
- break;
- case GRN_QUERY_ADJ_DEC :
- q->cur++;
- if (op->weight > -128) { op->weight--; }
- op->op = GRN_OP_ADJUST;
- break;
- case GRN_QUERY_ADJ_NEG :
- q->cur++;
- op->op = GRN_OP_ADJUST;
- op->weight = -1;
- break;
- case GRN_QUERY_PARENL :
- q->cur++;
- return get_expr(ctx, q, column, mode);
- break;
- case 'O' :
- if (q->cur[1] == 'R' && q->cur[2] == ' ') {
- q->cur += 2;
- op->op = GRN_OP_OR;
- break;
- }
- /* fallthru */
- default :
- return get_word(ctx, q, column, mode, option);
- break;
- }
- }
- return GRN_SUCCESS;
-}
-
-static grn_rc
-get_expr(grn_ctx *ctx, efs_info *q, grn_obj *column, grn_operator mode)
-{
- efs_op op;
- grn_rc rc = get_token(ctx, q, &op, column, mode);
- if (rc) { return rc; }
- while (!(rc = get_token(ctx, q, &op, column, mode))) {
- if (op.op == GRN_OP_ADJUST) {
- grn_expr_append_const_int(ctx, q->e, op.weight, GRN_OP_PUSH, 1);
- grn_expr_append_op(ctx, q->e, op.op, 3);
- } else {
- grn_expr_append_op(ctx, q->e, op.op, 2);
- }
- }
- return rc;
-}
-
#define DISABLE_UNUSED_CODE 1
#ifndef DISABLE_UNUSED_CODE
static const char *
@@ -6090,6 +5752,10 @@ get_word_(grn_ctx *ctx, efs_info *q)
mode = GRN_OP_SUFFIX;
q->cur = end + 2;
break;
+ case '~' :
+ mode = GRN_OP_REGEXP;
+ q->cur = end + 2;
+ break;
default :
mode = GRN_OP_EQUAL;
q->cur = end + 1;
@@ -6502,6 +6168,10 @@ parse_script(grn_ctx *ctx, efs_info *q)
PARSE(GRN_EXPR_TOKEN_SUFFIX);
q->cur += 2;
break;
+ case '~' :
+ PARSE(GRN_EXPR_TOKEN_REGEXP);
+ q->cur += 2;
+ break;
default :
PARSE(GRN_EXPR_TOKEN_MATCH);
q->cur++;
@@ -7195,3 +6865,42 @@ grn_expr_dump_plan(grn_ctx *ctx, grn_obj *expr, grn_obj *buffer)
}
GRN_API_RETURN(GRN_SUCCESS);
}
+
+static unsigned int
+grn_expr_estimate_size_raw(grn_ctx *ctx, grn_obj *expr, grn_obj *table)
+{
+ return grn_table_size(ctx, table);
+}
+
+unsigned int
+grn_expr_estimate_size(grn_ctx *ctx, grn_obj *expr)
+{
+ grn_obj *table;
+ grn_obj *variable;
+ unsigned int size;
+
+ variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+ if (!variable) {
+ ERR(GRN_INVALID_ARGUMENT, "at least one variable must be defined");
+ return 0;
+ }
+
+ table = grn_ctx_at(ctx, variable->header.domain);
+ if (!table) {
+ ERR(GRN_INVALID_ARGUMENT,
+ "variable refers unknown domain: <%u>", variable->header.domain);
+ return 0;
+ }
+
+ GRN_API_ENTER;
+#ifdef GRN_WITH_MRUBY
+ if (ctx->impl->mrb.state) {
+ size = grn_mrb_expr_estimate_size(ctx, expr, table);
+ } else {
+ size = grn_expr_estimate_size_raw(ctx, expr, table);
+ }
+#else
+ size = grn_expr_estimate_size_raw(ctx, expr, table);
+#endif
+ GRN_API_RETURN(size);
+}