summaryrefslogtreecommitdiff
path: root/storage/mroonga/vendor/groonga/lib/token.h
blob: 868930758ba9b72df569535db54e320b0a8555e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2009-2014 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License version 2.1 as published by the Free Software Foundation.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
#ifndef GRN_TOKEN_H
#define GRN_TOKEN_H

#ifndef GROONGA_IN_H
#include "groonga_in.h"
#endif /* GROONGA_IN_H */

#ifndef GRN_CTX_H
#include "ctx.h"
#endif /* GRN_CTX_H */

#ifndef GRN_DB_H
#include "db.h"
#endif /* GRN_DB_H */

#ifndef GRN_STR_H
#include "str.h"
#endif /* GRN_STR_H */

#include <groonga/tokenizer.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
  GRN_TOKEN_DOING = 0,
  GRN_TOKEN_DONE,
  GRN_TOKEN_DONE_SKIP,
  GRN_TOKEN_NOT_FOUND
} grn_token_status;

struct _grn_token {
  grn_obj data;
  grn_tokenizer_status status;
};

typedef struct {
  grn_obj *table;
  const unsigned char *orig;
  const unsigned char *curr;
  uint32_t orig_blen;
  uint32_t curr_size;
  int32_t pos;
  grn_token_mode mode;
  grn_token_status status;
  uint8_t force_prefix;
  grn_obj_flags table_flags;
  grn_encoding encoding;
  grn_obj *tokenizer;
  grn_proc_ctx pctx;
  grn_obj *token_filters;
  uint32_t variant;
  grn_obj *nstr;
} grn_token_cursor;

extern grn_obj *grn_token_uvector;

grn_rc grn_token_init(void);
grn_rc grn_token_fin(void);

#define GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER (0x01L<<0)

GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table,
                                                const char *str, size_t str_len,
                                                grn_token_mode mode,
                                                unsigned int flags);

GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor);
GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor);

grn_rc grn_db_init_mecab_tokenizer(grn_ctx *ctx);
grn_rc grn_db_init_builtin_tokenizers(grn_ctx *ctx);

#ifdef __cplusplus
}
#endif

#endif /* GRN_TOKEN_H */