summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/ii.h
blob: 877200fff7ca09d4fd6bfd6c764a01e38f2db5af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2009-2012 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License version 2.1 as published by the Free Software Foundation.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
#ifndef GRN_II_H
#define GRN_II_H

/* "ii" is for inverted index */

#ifndef GROONGA_IN_H
#include "groonga_in.h"
#endif /* GROONGA_IN_H */

#ifndef GRN_HASH_H
#include "hash.h"
#endif /* GRN_HASH_H */

#ifndef GRN_IO_H
#include "io.h"
#endif /* GRN_IO_H */

#ifndef GRN_STORE_H
#include "store.h"
#endif /* GRN_STORE_H */

#ifdef  __cplusplus
extern "C" {
#endif

struct _grn_ii {
  grn_db_obj obj;
  grn_io *seg;
  grn_io *chunk;
  grn_obj *lexicon;
  grn_obj_flags lflags;
  grn_encoding encoding;
  uint32_t n_elements;
  struct grn_ii_header *header;
};

#define GRN_II_BGQSIZE 16
#define GRN_II_MAX_LSEG           0x10000
#define GRN_II_W_TOTAL_CHUNK      40
#define GRN_II_W_CHUNK            22
#define GRN_II_W_LEAST_CHUNK      (GRN_II_W_TOTAL_CHUNK - 32)
#define GRN_II_MAX_CHUNK          (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK))
#define GRN_II_N_CHUNK_VARIATION  (GRN_II_W_CHUNK - GRN_II_W_LEAST_CHUNK)

struct grn_ii_header {
  uint64_t total_chunk_size;
  uint64_t bmax;
  uint32_t flags;
  uint32_t amax;
  uint32_t smax;
  uint32_t param1;
  uint32_t param2;
  uint32_t pnext;
  uint32_t bgqhead;
  uint32_t bgqtail;
  uint32_t bgqbody[GRN_II_BGQSIZE];
  uint32_t reserved[288];
  uint32_t ainfo[GRN_II_MAX_LSEG];
  uint32_t binfo[GRN_II_MAX_LSEG];
  uint32_t free_chunks[GRN_II_N_CHUNK_VARIATION + 1];
  uint32_t garbages[GRN_II_N_CHUNK_VARIATION + 1];
  uint32_t ngarbages[GRN_II_N_CHUNK_VARIATION + 1];
  uint8_t chunks[GRN_II_MAX_CHUNK >> 3];
};

struct _grn_ii_pos {
  struct _grn_ii_pos *next;
  uint32_t pos;
};

struct _grn_ii_updspec {
  uint32_t rid;
  uint32_t sid;
  int32_t weight;
  int32_t tf;                 /* number of postings successfully stored to index */
  int32_t atf;                /* actual number of postings */
  int32_t offset;
  struct _grn_ii_pos *pos;
  struct _grn_ii_pos *tail;
  /* grn_vgram_vnode *vnodes; */
};

typedef struct _grn_ii_updspec grn_ii_updspec;

GRN_API grn_ii *grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon,
                              uint32_t flags);
GRN_API grn_ii *grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon);
GRN_API grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii);
GRN_API grn_rc grn_ii_remove(grn_ctx *ctx, const char *path);
grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size);
grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u,
                         grn_hash *h);
grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u,
                         grn_hash *h);
grn_ii_updspec *grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid);
grn_rc grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u);
grn_rc grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight);
int grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b);

uint32_t grn_ii_estimate_size(grn_ctx *ctx, grn_ii *ii, uint32_t key);

void grn_ii_expire(grn_ctx *ctx, grn_ii *ii);

typedef struct {
  grn_id rid;
  uint32_t sid;
  uint32_t pos;
  uint32_t tf;
  uint32_t weight;
  uint32_t rest;
} grn_ii_posting;

typedef struct _grn_ii_cursor grn_ii_cursor;

GRN_API grn_rc grn_ii_posting_add(grn_ctx *ctx, grn_ii_posting *pos,
                                  grn_hash *s, grn_operator op);

GRN_API grn_ii_cursor *grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid,
                                          grn_id min, grn_id max, int nelements, int flags);
grn_ii_cursor *grn_ii_cursor_openv1(grn_ii *ii, uint32_t key);
grn_rc grn_ii_cursor_openv2(grn_ii_cursor **cursors, int ncursors);
GRN_API grn_ii_posting *grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c);
grn_ii_posting *grn_ii_cursor_next_pos(grn_ctx *ctx, grn_ii_cursor *c);
GRN_API grn_rc grn_ii_cursor_close(grn_ctx *ctx, grn_ii_cursor *c);

uint32_t grn_ii_max_section(grn_ii *ii);

int grn_ii_check(grn_ii *ii);
const char *grn_ii_path(grn_ii *ii);
grn_obj *grn_ii_lexicon(grn_ii *ii);

/*
grn_rc grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
                   const char *oldvalue, unsigned int oldvalue_len,
                   const char *newvalue, unsigned int newvalue_len);
grn_rc grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
                      unsigned int section,
                      grn_values *oldvalues, grn_values *newvalues);
*/

typedef struct _grn_select_optarg grn_select_optarg;

struct _grn_select_optarg {
  grn_operator mode;
  int similarity_threshold;
  int max_interval;
  int *weight_vector;
  int vector_size;
  int (*func)(grn_ctx *, grn_hash *, const void *, int, void *);
  void *func_arg;
  int max_size;
};

GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id,
                                    unsigned int section, grn_obj *oldvalue,
                                    grn_obj *newvalue, grn_obj *posting);
grn_rc grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
                            unsigned int string_len, grn_hash *s,
                            grn_operator op, grn_select_optarg *optarg);
grn_rc grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
                              grn_hash *s, grn_operator op, grn_select_optarg *optarg);
GRN_API grn_rc grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
                             grn_hash *s, grn_operator op, grn_select_optarg *optarg);
grn_rc grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
                  grn_hash *s, grn_operator op, grn_search_optarg *optarg);

void grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op);

grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op);

void grn_ii_inspect_elements(grn_ctx *ctx, grn_ii *ii, grn_obj *buf);
void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf);

grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity);

#ifdef __cplusplus
}
#endif

#endif /* GRN_II_H */