summaryrefslogtreecommitdiff
path: root/include/json_lib.h
blob: 610914486328ff934de8524f91f0332c2192a793 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
#ifndef JSON_LIB_INCLUDED
#define JSON_LIB_INCLUDED

#include <my_sys.h>

#ifdef __cplusplus
extern "C" {
#endif

#define JSON_DEPTH_LIMIT 32

/*
  When error happens, the c_next of the JSON engine contains the
  character that caused the error, and the c_str is the position
  in string where the error occurs.
*/
enum json_errors {
  JE_BAD_CHR= -1,      /* Invalid character, charset handler cannot read it. */

  JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */
                       /* ASCII 00-08 for instance.       */

  JE_EOS= -3,          /* Unexpected end of string. */

  JE_SYN= -4,          /* The next character breaks the JSON syntax. */

  JE_STRING_CONST= -5, /* Character disallowed in string constant. */

  JE_ESCAPING= -6,     /* Error in the escaping. */

  JE_DEPTH= -7,        /* The limit on the JSON depth was overrun. */
};


typedef struct st_json_string_t
{
  const uchar *c_str;    /* Current position in JSON string */
  const uchar *str_end;  /* The end on the string. */
  my_wc_t c_next;        /* UNICODE of the last read character */
  int error;             /* error code. */

  CHARSET_INFO *cs;      /* Character set of the JSON string. */

  my_charset_conv_mb_wc wc; /* UNICODE conversion function. */
                            /* It's taken out of the cs just to speed calls. */
} json_string_t;


void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs);
void json_string_set_str(json_string_t *s,
                         const uchar *str, const uchar *end);
#define json_next_char(j) \
  (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end)
#define json_eos(j) ((j)->c_str >= (j)->str_end)
/*
  read_string_const_chr() reads the next character of the string constant
  and saves it to the js->c_next.
  It takes into account possible escapings, so if for instance
  the string is '\b', the read_string_const_chr() sets 8.
*/
int json_read_string_const_chr(json_string_t *js);


/*
  Various JSON-related operations expect JSON path as a parameter.
  The path is a string like this "$.keyA[2].*"
  The path itself is a number of steps specifying either a key or a position
  in an array. Some of them can be wildcards.
  So the representation of the JSON path is the json_path_t class
  containing an array of json_path_step_t objects.
*/


/* Path step types - actually bitmasks to let '&' or '|' operations. */
enum json_path_step_types
{
  JSON_PATH_KEY_NULL=0,
  JSON_PATH_KEY=1,   /* Must be equal to JSON_VALUE_OBJECT. */
  JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */
  JSON_PATH_KEY_OR_ARRAY=3,
  JSON_PATH_WILD=4, /* Step like .* or [*] */
  JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */
  JSON_PATH_KEY_WILD= 1+4,
  JSON_PATH_KEY_DOUBLEWILD= 1+8,
  JSON_PATH_ARRAY_WILD= 2+4,
  JSON_PATH_ARRAY_DOUBLEWILD= 2+8,
  JSON_PATH_NEGATIVE_INDEX= 16,
  JSON_PATH_ARRAY_RANGE= 32
};


typedef struct st_json_path_step_t
{
  enum json_path_step_types type;  /* The type of the step -   */
                                   /* see json_path_step_types */
  const uchar *key; /* Pointer to the beginning of the key. */
  const uchar *key_end;  /* Pointer to the end of the key. */
  int n_item;  /* Item number in an array. No meaning for the key step. */
  int n_item_end; /* Last index of the range. */
} json_path_step_t;


typedef struct st_json_path_t
{
  json_string_t s;  /* The string to be parsed. */
  json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */
  json_path_step_t *last_step; /* Points to the last step. */

  int mode_strict; /* TRUE if the path specified as 'strict' */
  enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */
} json_path_t;


int json_path_setup(json_path_t *p,
                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end);


/*
  The set of functions and structures below provides interface
  to the JSON text parser.
  Running the parser normally goes like this:

    json_engine_t j_eng;   // structure keeps parser's data
    json_scan_start(j_eng) // begin the parsing

    do
    {
      // The parser has read next piece of JSON
      // and set fields of j_eng structure accordingly.
      // So let's see what we have:
      switch (j_eng.state)
      {
        case JST_KEY:
           // Handle key name. See the json_read_keyname_chr()
           // Probably compare it with the keyname we're looking for
        case JST_VALUE:
           // Handle value. It is either value of the key or an array item.
           // see the json_read_value()
        case JST_OBJ_START:
          // parser found an object (the '{' in JSON)
        case JST_OBJ_END:
          // parser found the end of the object (the '}' in JSON)
        case JST_ARRAY_START:
          // parser found an array (the '[' in JSON)
        case JST_ARRAY_END:
          // parser found the end of the array (the ']' in JSON)

      };
    } while (json_scan_next() == 0);  // parse next structure

    
    if (j_eng.s.error)  // we need to check why the loop ended.
                        // Did we get to the end of JSON, or came upon error.
    {
       signal_error_in_JSON()
    }


  Parts of JSON can be quickly skipped. If we are not interested
  in a particular key, we can just skip it with json_skip_key() call.
  Similarly json_skip_level() goes right to the end of an object
  or an array.
*/


/* These are JSON parser states that user can expect and handle.  */
enum json_states {
  JST_VALUE,       /* value found      */
  JST_KEY,         /* key found        */
  JST_OBJ_START,   /* object           */
  JST_OBJ_END,     /* object ended     */
  JST_ARRAY_START, /* array            */
  JST_ARRAY_END,   /* array ended      */
  NR_JSON_USER_STATES
};


enum json_value_types
{
  JSON_VALUE_UNINITIALIZED=0,
  JSON_VALUE_OBJECT=1,
  JSON_VALUE_ARRAY=2,
  JSON_VALUE_STRING=3,
  JSON_VALUE_NUMBER=4,
  JSON_VALUE_TRUE=5,
  JSON_VALUE_FALSE=6,
  JSON_VALUE_NULL=7
};


enum json_num_flags
{
  JSON_NUM_NEG=1,        /* Number is negative. */
  JSON_NUM_FRAC_PART=2,  /* The fractional part is not empty. */
  JSON_NUM_EXP=4,        /* The number has the 'e' part. */
};


typedef struct st_json_engine_t
{
  json_string_t s;  /* String to parse. */
  int sav_c_len;    /* Length of the current character.
                       Can be more than 1 for multibyte charsets */

  int state; /* The state of the parser. One of 'enum json_states'.
                It tells us what construction of JSON we've just read. */

  /* These values are only set after the json_read_value() call. */
  enum json_value_types value_type; /* type of the value.*/
  const uchar *value;      /* Points to the value. */
  const uchar *value_begin;/* Points to where the value starts in the JSON. */
  int value_escaped;       /* Flag telling if the string value has escaping.*/
  uint num_flags;  /* the details of the JSON_VALUE_NUMBER, is it negative,
                      or if it has the fractional part.
                      See the enum json_num_flags. */

  /*
    In most cases the 'value' and 'value_begin' are equal.
    They only differ if the value is a string constants. Then 'value_begin'
    points to the starting quotation mark, while the 'value' - to
    the first character of the string.
  */

  const uchar *value_end; /* Points to the next character after the value. */
  int value_len; /* The length of the value. Does not count quotations for */
                 /* string constants. */

  int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */
  int stack_p;                 /* The 'stack' pointer. */
  volatile uchar *killed_ptr;
} json_engine_t;


int json_scan_start(json_engine_t *je,
                        CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
int json_scan_next(json_engine_t *j);


/*
  json_read_keyname_chr() function assists parsing the name of an JSON key.
  It only can be called when the json_engine is in JST_KEY.
  The json_read_keyname_chr() reads one character of the name of the key,
  and puts it in j_eng.s.next_c.
  Typical usage is like this:

  if (j_eng.state == JST_KEY)
  {
    while (json_read_keyname_chr(&j) == 0)
    {
      //handle next character i.e. match it against the pattern
    }
  }
*/

int json_read_keyname_chr(json_engine_t *j);


/*
  Check if the name of the current JSON key matches
  the step of the path.
*/
int json_key_matches(json_engine_t *je, json_string_t *k);


/*
  json_read_value() function parses the JSON value syntax,
  so that we can handle the value of a key or an array item.
  It only returns meaningful result when the engine is in
  the JST_VALUE state.

  Typical usage is like this:

  if (j_eng.state ==  JST_VALUE)
  {
    json_read_value(&j_eng);
    switch(j_eng.value_type)
    {
      case JSON_VALUE_STRING:
        // get the string
        str= j_eng.value;
        str_length= j_eng.value_len;
      case JSON_VALUE_NUMBER:
        // get the number
      ... etc
    }
*/
int json_read_value(json_engine_t *j);


/*
  json_skip_key() makes parser skip the content of the current
  JSON key quickly.
  It can be called only when the json_engine state is JST_KEY.
  Typical usage is:

  if (j_eng.state == JST_KEY)
  {
    if (key_does_not_match(j_eng))
      json_skip_key(j_eng);
  }
*/

int json_skip_key(json_engine_t *j);


typedef const int *json_level_t;

/*
  json_skip_to_level() makes parser quickly get out of nested
  loops and arrays. It is used when we're not interested in what is
  there in the rest of these structures.
  The 'level' should be remembered in advance.
        json_level_t level= json_get_level(j);
        .... // getting into the nested JSON structures
        json_skip_to_level(j, level);
*/
#define json_get_level(j) (j->stack_p)

int json_skip_to_level(json_engine_t *j, int level);

/*
  json_skip_level() works as above with just current structure.
  So it gets to the end of the current JSON array or object.
*/
#define json_skip_level(json_engine) \
  json_skip_to_level((json_engine), (json_engine)->stack_p)


/*
  works as json_skip_level() but also counts items on the current
  level skipped.
*/
int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped);

#define json_skip_array_item json_skip_key

/*
  Checks if the current value is of scalar type -
  not an OBJECT nor ARRAY.
*/
#define json_value_scalar(je)  ((je)->value_type > JSON_VALUE_ARRAY)


/*
  Look for the JSON PATH in the json string.
  Function can be called several times with same JSON/PATH to
  find multiple matches.
  On the first call, the json_engine_t parameter should be
  initialized with the JSON string, and the json_path_t with the JSON path
  appropriately. The 'p_cur_step' should point at the first
  step of the path.
  The 'array_counters' is the array of JSON_DEPTH_LIMIT size.
  It stores the array counters of the parsed JSON.
  If function returns 0, it means it found the match. The position of
  the match is je->s.c_str. Then we can call the json_find_path()
  with same engine/path/p_cur_step to get the next match.
  Non-zero return means no matches found.
  Check je->s.error to see if there was an error in JSON.
*/
int json_find_path(json_engine_t *je,
                   json_path_t *p, json_path_step_t **p_cur_step,
                   int *array_counters);


typedef struct st_json_find_paths_t
{
  uint n_paths;
  json_path_t *paths;
  uint cur_depth;
  uint *path_depths;
  int array_counters[JSON_DEPTH_LIMIT];
} json_find_paths_t;


int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
                          uint n_paths, json_path_t *paths, uint *path_depths);
int json_find_paths_next(json_engine_t *je, json_find_paths_t *state);


#define JSON_ERROR_OUT_OF_SPACE  (-1)
#define JSON_ERROR_ILLEGAL_SYMBOL (-2)

/*
  Convert JSON string constant into ordinary string constant
  which can involve unpacking json escapes and changing character set.
  Returns negative integer in the case of an error,
  the length of the result otherwise.
*/
int json_unescape(CHARSET_INFO *json_cs,
                  const uchar *json_str, const uchar *json_end,
                  CHARSET_INFO *res_cs,
                  uchar *res, uchar *res_end);

/*
  Convert a string constant into JSON string constant.
  This can involve appropriate escaping and changing the character set.
  Returns the length of the result on success,
  on error returns a negative error code.
  Some error codes:
    JSON_ERROR_OUT_OF_SPACE    Not enough space in the provided buffer
    JSON_ERROR_ILLEGAL_SYMBOL  Source symbol cannot be represented in JSON
*/
int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end,
                CHARSET_INFO *json_cs, uchar *json, uchar *json_end);


/*
  Appends the ASCII string to the json with the charset conversion.
*/
int json_append_ascii(CHARSET_INFO *json_cs,
                      uchar *json, uchar *json_end,
                      const uchar *ascii, const uchar *ascii_end);


/*
  Scan the JSON and return paths met one-by-one.
     json_get_path_start(&p)
     while (json_get_path_next(&p))
     {
       handle_the_next_path();
     }
*/

int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
                        const uchar *str, const uchar *end,
                        json_path_t *p);


int json_get_path_next(json_engine_t *je, json_path_t *p);

int json_path_compare(const json_path_t *a, const json_path_t *b,
                      enum json_value_types vt, const int* array_size_counter);

int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs);

int json_locate_key(const char *js, const char *js_end,
                    const char *kname,
                    const char **key_start, const char **key_end,
                    int *comma_pos);

int json_normalize(DYNAMIC_STRING *result,
                   const char *s, size_t size, CHARSET_INFO *cs);

int json_skip_array_and_count(json_engine_t *j, int* n_item);

inline static int json_scan_ended(json_engine_t *j)
{
  return (j->state == JST_ARRAY_END && j->stack_p == 0);
}

#ifdef  __cplusplus
}
#endif

#endif /* JSON_LIB_INCLUDED */