/* * Copyright © 2021 Benjamin Otte * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Authors: Benjamin Otte */ #include "config.h" #include "gtkjsonparserprivate.h" typedef struct _GtkJsonBlock GtkJsonBlock; typedef enum { GTK_JSON_BLOCK_TOPLEVEL, GTK_JSON_BLOCK_OBJECT, GTK_JSON_BLOCK_ARRAY, } GtkJsonBlockType; struct _GtkJsonBlock { GtkJsonBlockType type; const guchar *value; /* start of current value to be consumed by external code */ const guchar *member_name; /* name of current value, only used for object types */ gsize index; /* index of the current element */ }; struct _GtkJsonParser { GBytes *bytes; const guchar *reader; /* current read head, pointing as far as we've read */ const guchar *start; /* pointer at start of data, after optional BOM */ const guchar *end; /* pointer after end of data we're reading */ GError *error; /* if an error has happened, it's stored here. Errors aren't recoverable. */ const guchar *error_start; /* start of error location */ const guchar *error_end; /* end of error location */ GtkJsonBlock *block; /* current block */ GtkJsonBlock *blocks; /* blocks array */ GtkJsonBlock *blocks_end; /* blocks array */ GtkJsonBlock blocks_preallocated[128]; /* preallocated */ }; typedef enum { WHITESPACE = (1 << 4), NEWLINE = (1 << 5), STRING_ELEMENT = (1 << 6), STRING_MARKER = (1 << 7), } JsonCharacterType; #define JSON_CHARACTER_NODE_MASK ((1 << 4) - 1) static const guchar json_character_table[256] = { ['\t'] = WHITESPACE, ['\r'] = WHITESPACE | NEWLINE, ['\n'] = WHITESPACE | NEWLINE, [' '] = WHITESPACE | STRING_ELEMENT, ['!'] = STRING_ELEMENT, ['"'] = GTK_JSON_STRING | STRING_MARKER, ['#'] = STRING_ELEMENT, ['$'] = STRING_ELEMENT, ['%'] = STRING_ELEMENT, ['&'] = STRING_ELEMENT, ['\''] = STRING_ELEMENT, ['('] = STRING_ELEMENT, [')'] = STRING_ELEMENT, ['*'] = STRING_ELEMENT, ['+'] = STRING_ELEMENT, [','] = STRING_ELEMENT, ['-'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['.'] = STRING_ELEMENT, ['/'] = STRING_ELEMENT, ['0'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['1'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['2'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['3'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['4'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['5'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['6'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['7'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['8'] = GTK_JSON_NUMBER | STRING_ELEMENT, ['9'] = GTK_JSON_NUMBER | STRING_ELEMENT, [':'] = STRING_ELEMENT, [';'] = STRING_ELEMENT, ['<'] = STRING_ELEMENT, ['='] = STRING_ELEMENT, ['>'] = STRING_ELEMENT, ['?'] = STRING_ELEMENT, ['@'] = STRING_ELEMENT, ['A'] = STRING_ELEMENT, ['B'] = STRING_ELEMENT, ['C'] = STRING_ELEMENT, ['D'] = STRING_ELEMENT, ['E'] = STRING_ELEMENT, ['F'] = STRING_ELEMENT, ['G'] = STRING_ELEMENT, ['H'] = STRING_ELEMENT, ['I'] = STRING_ELEMENT, ['J'] = STRING_ELEMENT, ['K'] = STRING_ELEMENT, ['L'] = STRING_ELEMENT, ['M'] = STRING_ELEMENT, ['N'] = STRING_ELEMENT, ['O'] = STRING_ELEMENT, ['P'] = STRING_ELEMENT, ['Q'] = STRING_ELEMENT, ['R'] = STRING_ELEMENT, ['S'] = STRING_ELEMENT, ['T'] = STRING_ELEMENT, ['U'] = STRING_ELEMENT, ['V'] = STRING_ELEMENT, ['W'] = STRING_ELEMENT, ['X'] = STRING_ELEMENT, ['Y'] = STRING_ELEMENT, ['Z'] = STRING_ELEMENT, ['['] = GTK_JSON_ARRAY | STRING_ELEMENT, ['\\'] = STRING_MARKER, [']'] = STRING_ELEMENT, ['^'] = STRING_ELEMENT, ['_'] = STRING_ELEMENT, ['`'] = STRING_ELEMENT, ['a'] = STRING_ELEMENT, ['b'] = STRING_ELEMENT, ['c'] = STRING_ELEMENT, ['d'] = STRING_ELEMENT, ['e'] = STRING_ELEMENT, ['f'] = GTK_JSON_BOOLEAN | STRING_ELEMENT, ['g'] = STRING_ELEMENT, ['h'] = STRING_ELEMENT, ['i'] = STRING_ELEMENT, ['j'] = STRING_ELEMENT, ['k'] = STRING_ELEMENT, ['l'] = STRING_ELEMENT, ['m'] = STRING_ELEMENT, ['n'] = GTK_JSON_NULL | STRING_ELEMENT, ['o'] = STRING_ELEMENT, ['p'] = STRING_ELEMENT, ['q'] = STRING_ELEMENT, ['r'] = STRING_ELEMENT, ['s'] = STRING_ELEMENT, ['t'] = GTK_JSON_BOOLEAN | STRING_ELEMENT, ['u'] = STRING_ELEMENT, ['v'] = STRING_ELEMENT, ['w'] = STRING_ELEMENT, ['x'] = STRING_ELEMENT, ['y'] = STRING_ELEMENT, ['z'] = STRING_ELEMENT, ['{'] = GTK_JSON_OBJECT | STRING_ELEMENT, ['|'] = STRING_ELEMENT, ['}'] = STRING_ELEMENT, ['~'] = STRING_ELEMENT, [127] = STRING_ELEMENT, }; static const guchar * json_skip_characters (const guchar *start, const guchar *end, JsonCharacterType type) { const guchar *s; for (s = start; s < end; s++) { if (!(json_character_table[*s] & type)) break; } return s; } static const guchar * json_skip_characters_until (const guchar *start, const guchar *end, JsonCharacterType type) { const guchar *s; for (s = start; s < end; s++) { if (json_character_table[*s] & type) break; } return s; } static const guchar * json_find_character (const guchar *start, JsonCharacterType type) { const guchar *s; for (s = start; ; s++) { if ((json_character_table[*s] & type)) break; } return s; } GQuark gtk_json_error_quark (void) { return g_quark_from_static_string ("gtk-json-error-quark"); } static void gtk_json_parser_take_error (GtkJsonParser *self, const guchar *start_location, const guchar *end_location, GError *error) { g_assert (start_location <= end_location); g_assert (self->start <= start_location); g_assert (end_location <= self->end); if (self->error) { g_error_free (error); return; } self->error = error; self->error_start = start_location; self->error_end = end_location; } static void gtk_json_parser_syntax_error_at (GtkJsonParser *self, const guchar *error_start, const guchar *error_end, const char *format, ...) G_GNUC_PRINTF(4, 5); static void gtk_json_parser_syntax_error_at (GtkJsonParser *self, const guchar *error_start, const guchar *error_end, const char *format, ...) { va_list args; if (self->error) return; va_start (args, format); gtk_json_parser_take_error (self, error_start, error_end, g_error_new_valist (GTK_JSON_ERROR, GTK_JSON_ERROR_SYNTAX, format, args)); va_end (args); } static void gtk_json_parser_syntax_error (GtkJsonParser *self, const char *format, ...) G_GNUC_PRINTF(2, 3); static void gtk_json_parser_syntax_error (GtkJsonParser *self, const char *format, ...) { va_list args; const guchar *error_end; if (self->error) return; va_start (args, format); for (error_end = self->reader; error_end < self->end && g_ascii_isalnum (*error_end); error_end++) ; if (error_end == self->reader && g_utf8_get_char_validated ((const char *) error_end, self->end - error_end) < (gunichar) -2) { error_end = (const guchar *) g_utf8_next_char (error_end); } gtk_json_parser_take_error (self, self->reader, error_end, g_error_new_valist (GTK_JSON_ERROR, GTK_JSON_ERROR_SYNTAX, format, args)); va_end (args); } static void gtk_json_parser_type_error (GtkJsonParser *self, const char *format, ...) G_GNUC_PRINTF(2, 3); static void gtk_json_parser_type_error (GtkJsonParser *self, const char *format, ...) { const guchar *start_location; va_list args; if (self->error) return; if (self->block->value) start_location = self->block->value; else if (self->block != self->blocks) start_location = self->block[-1].value; else start_location = self->start; va_start (args, format); gtk_json_parser_take_error (self, start_location, self->reader, g_error_new_valist (GTK_JSON_ERROR, GTK_JSON_ERROR_TYPE, format, args)); va_end (args); } void gtk_json_parser_value_error (GtkJsonParser *self, const char *format, ...) { const guchar *start_location; va_list args; if (self->error) return; if (self->block->value) start_location = self->block->value; else if (self->block != self->blocks) start_location = self->block[-1].value; else start_location = self->start; va_start (args, format); gtk_json_parser_take_error (self, start_location, self->reader, g_error_new_valist (GTK_JSON_ERROR, GTK_JSON_ERROR_VALUE, format, args)); va_end (args); } void gtk_json_parser_schema_error (GtkJsonParser *self, const char *format, ...) { const guchar *start_location; va_list args; if (self->error) return; if (self->block->member_name) start_location = self->block->member_name; if (self->block->value) start_location = self->block->value; else if (self->block != self->blocks) start_location = self->block[-1].value; else start_location = self->start; va_start (args, format); gtk_json_parser_take_error (self, start_location, self->reader, g_error_new_valist (GTK_JSON_ERROR, GTK_JSON_ERROR_SCHEMA, format, args)); va_end (args); } static gboolean gtk_json_parser_is_eof (GtkJsonParser *self) { return self->reader >= self->end; } static gsize gtk_json_parser_remaining (GtkJsonParser *self) { g_return_val_if_fail (self->reader <= self->end, 0); return self->end - self->reader; } static void gtk_json_parser_skip_bom (GtkJsonParser *self) { if (gtk_json_parser_remaining (self) < 3) return; if (self->reader[0] == 0xEF && self->reader[1] == 0xBB && self->reader[2] == 0xBF) self->reader += 3; } static void gtk_json_parser_skip_whitespace (GtkJsonParser *self) { self->reader = json_skip_characters (self->reader, self->end, WHITESPACE); } static gboolean gtk_json_parser_has_char (GtkJsonParser *self, char c) { return gtk_json_parser_remaining (self) && *self->reader == c; } static gboolean gtk_json_parser_try_char (GtkJsonParser *self, char c) { if (!gtk_json_parser_has_char (self, c)) return FALSE; self->reader++; return TRUE; } static gboolean gtk_json_parser_try_identifier_len (GtkJsonParser *self, const char *ident, gsize len) { if (gtk_json_parser_remaining (self) < len) return FALSE; if (memcmp (self->reader, ident, len) != 0) return FALSE; self->reader += len; return TRUE; } #define gtk_json_parser_try_identifier(parser, ident) gtk_json_parser_try_identifier_len(parser, ident, strlen(ident)) /* * decode_utf16_surrogate_pair: * @first: the first UTF-16 code point * @second: the second UTF-16 code point * * Decodes a surrogate pair of UTF-16 code points into the equivalent * Unicode code point. * * If the code points are not valid, 0 is returned. * * Returns: the Unicode code point equivalent to the surrogate pair */ static inline gunichar decode_utf16_surrogate_pair (gunichar first, gunichar second) { if (0xd800 > first || first > 0xdbff || 0xdc00 > second || second > 0xdfff) return 0; return 0x10000 | (first & 0x3ff) << 10 | (second & 0x3ff); } static gsize gtk_json_unescape_char (const guchar *json_escape, char out_data[6], gsize *out_len) { switch (json_escape[1]) { case '"': case '\\': case '/': out_data[0] = json_escape[1]; *out_len = 1; return 2; case 'b': out_data[0] = '\b'; *out_len = 1; return 2; case 'f': out_data[0] = '\f'; *out_len = 1; return 2; case 'n': out_data[0] = '\n'; *out_len = 1; return 2; case 'r': out_data[0] = '\r'; *out_len = 1; return 2; case 't': out_data[0] = '\t'; *out_len = 1; return 2; case 'u': { gunichar unichar = (g_ascii_xdigit_value (json_escape[2]) << 12) | (g_ascii_xdigit_value (json_escape[3]) << 8) | (g_ascii_xdigit_value (json_escape[4]) << 4) | (g_ascii_xdigit_value (json_escape[5])); gsize result = 6; /* resolve UTF-16 surrogates for Unicode characters not in the BMP, * as per ECMA 404, § 9, "String" */ if (g_unichar_type (unichar) == G_UNICODE_SURROGATE) { unichar = decode_utf16_surrogate_pair (unichar, (g_ascii_xdigit_value (json_escape[8]) << 12) | (g_ascii_xdigit_value (json_escape[9]) << 8) | (g_ascii_xdigit_value (json_escape[10]) << 4) | (g_ascii_xdigit_value (json_escape[11]))); result += 6; } *out_len = g_unichar_to_utf8 (unichar, out_data); return result; } default: g_assert_not_reached (); return 0; } } typedef struct _JsonStringIter JsonStringIter; struct _JsonStringIter { char buf[6]; const guchar *s; const guchar *next; }; static gsize json_string_iter_next (JsonStringIter *iter) { gsize len; iter->s = iter->next; iter->next = json_find_character (iter->s, STRING_MARKER); if (iter->next != iter->s) return iter->next - iter->s; if (*iter->next == '"') return 0; iter->next += gtk_json_unescape_char (iter->next, iter->buf, &len); iter->s = (const guchar *) iter->buf; return len; } /* The escaped string MUST be valid json, so it must begin * with " and end with " and must not contain any invalid * escape codes. * This function is meant to be fast */ static gsize json_string_iter_init (JsonStringIter *iter, const guchar *string) { g_assert (*string == '"'); iter->next = string + 1; return json_string_iter_next (iter); } static gboolean json_string_iter_has_next (JsonStringIter *iter) { return *iter->next != '"'; } static const char * json_string_iter_get (JsonStringIter *iter) { return (const char *) iter->s; } /* The escaped string MUST be valid json, so it must begin * with " and end with " and must not contain any invalid * escape codes. * This function is meant to be fast */ static char * gtk_json_unescape_string (const guchar *escaped) { JsonStringIter iter; GString *string; gsize len; len = json_string_iter_init (&iter, escaped); string = NULL; if (!json_string_iter_has_next (&iter)) return g_strndup (json_string_iter_get (&iter), len); string = g_string_new (NULL); do { g_string_append_len (string, json_string_iter_get (&iter), len); } while ((len = json_string_iter_next (&iter))); return g_string_free (string, FALSE); } static gboolean gtk_json_parser_parse_string (GtkJsonParser *self) { const guchar *start; start = self->reader; if (!gtk_json_parser_try_char (self, '"')) { gtk_json_parser_type_error (self, "Not a string"); return FALSE; } self->reader = json_skip_characters (self->reader, self->end, STRING_ELEMENT); while (gtk_json_parser_remaining (self)) { if (*self->reader < 0x20) { if (*self->reader == '\r' || *self->reader == '\n') gtk_json_parser_syntax_error (self, "Newlines in strings are not allowed"); else if (*self->reader == '\t') gtk_json_parser_syntax_error (self, "Tabs not allowed in strings"); else gtk_json_parser_syntax_error (self, "Disallowed control character in string literal"); return FALSE; } else if (*self->reader > 127) { gunichar c = g_utf8_get_char_validated ((const char *) self->reader, gtk_json_parser_remaining (self)); if (c == (gunichar) -2 || c == (gunichar) -1) { gtk_json_parser_syntax_error (self, "Invalid UTF-8"); return FALSE; } self->reader = (const guchar *) g_utf8_next_char ((const char *) self->reader); } else if (*self->reader == '"') { self->reader++; return TRUE; } else if (*self->reader == '\\') { if (gtk_json_parser_remaining (self) < 2) { self->reader = self->end; goto end; } switch (self->reader[1]) { case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r': case 't': break; case 'u': /* lots of work necessary to validate the unicode escapes here */ if (gtk_json_parser_remaining (self) < 6 || !g_ascii_isxdigit (self->reader[2]) || !g_ascii_isxdigit (self->reader[3]) || !g_ascii_isxdigit (self->reader[4]) || !g_ascii_isxdigit (self->reader[5])) { const guchar *end; for (end = self->reader + 2; end < self->reader + 6 && end < self->end; end++) { if (!g_ascii_isxdigit (*end)) break; } gtk_json_parser_syntax_error_at (self, self->reader, end, "Invalid Unicode escape sequence"); return FALSE; } else { gsize escape_size = 6; gunichar unichar = (g_ascii_xdigit_value (self->reader[2]) << 12) | (g_ascii_xdigit_value (self->reader[3]) << 8) | (g_ascii_xdigit_value (self->reader[4]) << 4) | (g_ascii_xdigit_value (self->reader[5])); /* resolve UTF-16 surrogates for Unicode characters not in the BMP, * as per ECMA 404, § 9, "String" */ if (g_unichar_type (unichar) == G_UNICODE_SURROGATE) { if (gtk_json_parser_remaining (self) >= 12 && self->reader[6] == '\\' && self->reader[7] == 'u' && g_ascii_isxdigit (self->reader[8]) && g_ascii_isxdigit (self->reader[9]) && g_ascii_isxdigit (self->reader[10]) && g_ascii_isxdigit (self->reader[11])) { unichar = decode_utf16_surrogate_pair (unichar, (g_ascii_xdigit_value (self->reader[8]) << 12) | (g_ascii_xdigit_value (self->reader[9]) << 8) | (g_ascii_xdigit_value (self->reader[10]) << 4) | (g_ascii_xdigit_value (self->reader[11]))); escape_size += 6; } else { unichar = 0; } if (unichar == 0) { gtk_json_parser_syntax_error_at (self, self->reader, self->reader + escape_size, "Invalid UTF-16 surrogate pair"); return FALSE; } self->reader += escape_size - 2; } } break; default: if (g_utf8_get_char_validated ((const char *) self->reader + 1, self->end - self->reader - 1) < (gunichar) -2) gtk_json_parser_syntax_error_at (self, self->reader, (const guchar *) g_utf8_next_char (self->reader + 1), "Unknown escape sequence"); else gtk_json_parser_syntax_error_at (self, self->reader, self->reader + 1, "Unknown escape sequence"); return FALSE; } self->reader += 2; } self->reader = json_skip_characters (self->reader, self->end, STRING_ELEMENT); } end: gtk_json_parser_syntax_error_at (self, start, self->reader, "Unterminated string literal"); return FALSE; } static gboolean gtk_json_parser_parse_number (GtkJsonParser *self) { const guchar *start = self->reader; gboolean have_sign; /* sign */ have_sign = gtk_json_parser_try_char (self, '-'); /* integer part */ if (gtk_json_parser_try_char (self, '0')) { /* Technically, "01" in the JSON grammar would be 2 numbers: * "0" followed by "1". * Practically, nobody understands that it's 2 numbers, so we * special-purpose an error message for it, because 2 numbers * can never follow each other. */ if (!gtk_json_parser_is_eof (self) && g_ascii_isdigit (*self->reader)) { do { self->reader++; } while (!gtk_json_parser_is_eof (self) && g_ascii_isdigit (*self->reader)); gtk_json_parser_syntax_error_at (self, start, self->reader, "Numbers may not start with leading 0s"); return FALSE; } } else { if (gtk_json_parser_is_eof (self) || !g_ascii_isdigit (*self->reader)) { if (have_sign) gtk_json_parser_syntax_error_at (self, start, self->reader, "Expected a number after '-' character"); else gtk_json_parser_type_error (self, "Not a number"); return FALSE; } self->reader++; while (!gtk_json_parser_is_eof (self) && g_ascii_isdigit (*self->reader)) self->reader++; } /* fractional part */ if (gtk_json_parser_try_char (self, '.')) { if (!g_ascii_isdigit (*self->reader)) { gtk_json_parser_syntax_error_at (self, start, self->reader, "Expected a digit after '.'"); return FALSE; } do { self->reader++; } while (!gtk_json_parser_is_eof (self) && g_ascii_isdigit (*self->reader)); } /* exponent */ if (gtk_json_parser_try_char (self, 'e') || gtk_json_parser_try_char (self, 'E')) { if (!gtk_json_parser_try_char (self, '-')) gtk_json_parser_try_char (self, '+'); if (!g_ascii_isdigit (*self->reader)) { gtk_json_parser_syntax_error_at (self, start, self->reader, "Expected a digit in exponent"); return FALSE; } do { self->reader++; } while (!gtk_json_parser_is_eof (self) && g_ascii_isdigit (*self->reader)); } return TRUE; } static gboolean gtk_json_parser_parse_value (GtkJsonParser *self) { if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error (self, "Unexpected end of document"); return FALSE; } switch (json_character_table[*self->block->value] & JSON_CHARACTER_NODE_MASK) { case GTK_JSON_STRING: return gtk_json_parser_parse_string (self); case GTK_JSON_NUMBER: return gtk_json_parser_parse_number (self); case GTK_JSON_NULL: if (gtk_json_parser_try_identifier (self, "null")) return TRUE; break; case GTK_JSON_BOOLEAN: if (gtk_json_parser_try_identifier (self, "true") || gtk_json_parser_try_identifier (self, "false")) return TRUE; break; case GTK_JSON_OBJECT: case GTK_JSON_ARRAY: /* don't preparse objects */ return TRUE; default: break; } if (gtk_json_parser_remaining (self) >= 2 && (self->block->value[0] == '.' || self->block->value[0] == '+') && g_ascii_isdigit (self->block->value[1])) { const guchar *end = self->block->value + 2; while (end < self->end && g_ascii_isalnum (*end)) end++; gtk_json_parser_syntax_error_at (self, self->block->value, end, "Numbers may not start with '%c'", *self->block->value); } else if (*self->reader == 0) gtk_json_parser_syntax_error (self, "Unexpected nul byte in document"); else gtk_json_parser_syntax_error (self, "Expected a value"); return FALSE; } static void gtk_json_parser_push_block (GtkJsonParser *self, GtkJsonBlockType type) { self->block++; if (self->block == self->blocks_end) { gsize old_size = self->blocks_end - self->blocks; gsize new_size = old_size + 128; if (self->blocks == self->blocks_preallocated) { self->blocks = g_new (GtkJsonBlock, new_size); memcpy (self->blocks, self->blocks_preallocated, sizeof (GtkJsonBlock) * G_N_ELEMENTS (self->blocks_preallocated)); } else { self->blocks = g_renew (GtkJsonBlock, self->blocks, new_size); } self->blocks_end = self->blocks + new_size; self->block = self->blocks + old_size; } self->block->type = type; self->block->member_name = 0; self->block->value = 0; self->block->index = 0; } static void gtk_json_parser_pop_block (GtkJsonParser *self) { g_assert (self->block > self->blocks); self->block--; } GtkJsonParser * gtk_json_parser_new_for_string (const char *string, gssize size) { GtkJsonParser *self; GBytes *bytes; bytes = g_bytes_new (string, size >= 0 ? size : strlen (string)); self = gtk_json_parser_new_for_bytes (bytes); g_bytes_unref (bytes); return self; } GtkJsonParser * gtk_json_parser_new_for_bytes (GBytes *bytes) { GtkJsonParser *self; gsize size; g_return_val_if_fail (bytes != NULL, NULL); self = g_slice_new0 (GtkJsonParser); self->bytes = g_bytes_ref (bytes); self->reader = g_bytes_get_data (bytes, &size); self->end = self->reader + size; self->blocks = self->blocks_preallocated; self->blocks_end = self->blocks + G_N_ELEMENTS (self->blocks_preallocated); self->block = self->blocks; self->block->type = GTK_JSON_BLOCK_TOPLEVEL; gtk_json_parser_skip_bom (self); self->start = self->reader; gtk_json_parser_rewind (self); return self; } void gtk_json_parser_free (GtkJsonParser *self) { if (self == NULL) return; g_bytes_unref (self->bytes); if (self->blocks != self->blocks_preallocated) g_free (self->blocks); if (self->error) g_error_free (self->error); g_slice_free (GtkJsonParser, self); } static gboolean gtk_json_parser_skip_block (GtkJsonParser *self) { gsize depth; if (self->reader != self->block->value) return TRUE; depth = gtk_json_parser_get_depth (self); while (TRUE) { if (*self->reader == '{') { if (!gtk_json_parser_start_object (self)) return FALSE; } else if (*self->reader == '[') { if (!gtk_json_parser_start_array (self)) return FALSE; } while (self->reader != self->block->value) { /* This should never be reentrant to this function or we might * loop causing stack overflow */ if (!gtk_json_parser_next (self)) { if (!gtk_json_parser_end (self)) return FALSE; if (depth >= gtk_json_parser_get_depth (self)) return TRUE; } } } return TRUE; } gboolean gtk_json_parser_next (GtkJsonParser *self) { if (self->error) return FALSE; if (self->block->value == NULL) return FALSE; if (!gtk_json_parser_skip_block (self)) { g_assert (self->error); return FALSE; } switch (self->block->type) { case GTK_JSON_BLOCK_TOPLEVEL: gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { self->block->value = NULL; } else if (*self->reader == 0) { gtk_json_parser_syntax_error (self, "Unexpected nul byte in document"); } else { gtk_json_parser_syntax_error_at (self, self->reader, self->end, "Data at end of document"); } return FALSE; case GTK_JSON_BLOCK_OBJECT: gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error_at (self, self->block[-1].value, self->reader, "Unterminated object"); self->block->member_name = NULL; self->block->value = NULL; } if (gtk_json_parser_has_char (self, '}')) { self->block->member_name = NULL; self->block->value = NULL; return FALSE; } if (!gtk_json_parser_try_char (self, ',')) { gtk_json_parser_syntax_error (self, "Expected a ',' to separate object members"); return FALSE; } gtk_json_parser_skip_whitespace (self); if (!gtk_json_parser_has_char (self, '"')) { gtk_json_parser_syntax_error (self, "Expected a string for object member name"); return FALSE; } self->block->member_name = self->reader; if (!gtk_json_parser_parse_string (self)) return FALSE; gtk_json_parser_skip_whitespace (self); if (!gtk_json_parser_try_char (self, ':')) { gtk_json_parser_syntax_error (self, "Missing ':' after member name"); return FALSE; } gtk_json_parser_skip_whitespace (self); self->block->value = self->reader; if (!gtk_json_parser_parse_value (self)) return FALSE; break; case GTK_JSON_BLOCK_ARRAY: gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error_at (self, self->block[-1].value, self->reader, "Unterminated array"); self->block->member_name = NULL; self->block->value = NULL; } if (gtk_json_parser_has_char (self, ']')) { self->block->value = NULL; return FALSE; } if (!gtk_json_parser_try_char (self, ',')) { gtk_json_parser_syntax_error (self, "Expected a ',' to separate array members"); return FALSE; } gtk_json_parser_skip_whitespace (self); self->block->value = self->reader; if (!gtk_json_parser_parse_value (self)) return FALSE; break; default: g_assert_not_reached (); break; } return TRUE; } void gtk_json_parser_rewind (GtkJsonParser *self) { if (self->error) return; switch (self->block->type) { case GTK_JSON_BLOCK_OBJECT: gtk_json_parser_pop_block (self); self->reader = self->block->value; gtk_json_parser_start_object (self); break; case GTK_JSON_BLOCK_ARRAY: gtk_json_parser_pop_block (self); self->reader = self->block->value; gtk_json_parser_start_array (self); break; case GTK_JSON_BLOCK_TOPLEVEL: self->reader = self->start; gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error_at (self, self->start, self->reader, "Empty document"); } else { self->block->value = self->reader; gtk_json_parser_parse_value (self); } break; default: g_assert_not_reached (); return; } } gsize gtk_json_parser_get_depth (GtkJsonParser *self) { return self->block - self->blocks; } GtkJsonNode gtk_json_parser_get_node (GtkJsonParser *self) { if (self->error) return GTK_JSON_NONE; if (self->block->value == NULL) return GTK_JSON_NONE; return (json_character_table[*self->block->value] & JSON_CHARACTER_NODE_MASK); } const GError * gtk_json_parser_get_error (GtkJsonParser *self) { return self->error; } void gtk_json_parser_get_error_offset (GtkJsonParser *self, gsize *start, gsize *end) { const guchar *data; if (self->error == NULL) { if (start) *start = 0; if (end) *end = 0; return; } data = g_bytes_get_data (self->bytes, NULL); if (start) *start = self->error_start - data; if (end) *end = self->error_end - data; } void gtk_json_parser_get_error_location (GtkJsonParser *self, gsize *start_line, gsize *start_line_bytes, gsize *end_line, gsize *end_line_bytes) { const guchar *s, *line_start; gsize lines; if (self->error == NULL) { if (start_line) *start_line = 0; if (start_line_bytes) *start_line_bytes = 0; if (end_line) *end_line = 0; if (end_line_bytes) *end_line_bytes = 0; return; } line_start = self->start; lines = 0; for (s = json_skip_characters_until (line_start, self->error_start, NEWLINE); s < self->error_start; s = json_skip_characters_until (line_start, self->error_start, NEWLINE)) { if (s[0] == '\r' && s + 1 < self->error_start && s[1] == '\n') s++; lines++; line_start = s + 1; } if (start_line) *start_line = lines; if (start_line_bytes) *start_line_bytes = s - line_start; if (end_line == NULL && end_line_bytes == NULL) return; for (s = json_skip_characters_until (s, self->error_end, NEWLINE); s < self->error_end; s = json_skip_characters_until (line_start, self->error_end, NEWLINE)) { if (s[0] == '\r' && s + 1 < self->error_start && s[1] == '\n') s++; lines++; line_start = s + 1; } if (end_line) *end_line = lines; if (end_line_bytes) *end_line_bytes = s - line_start; } static gboolean gtk_json_parser_supports_member (GtkJsonParser *self) { if (self->error) return FALSE; if (self->block->type != GTK_JSON_BLOCK_OBJECT) return FALSE; if (self->block->member_name == NULL) return FALSE; return TRUE; } char * gtk_json_parser_get_member_name (GtkJsonParser *self) { if (!gtk_json_parser_supports_member (self)) return NULL; return gtk_json_unescape_string (self->block->member_name); } gboolean gtk_json_parser_has_member (GtkJsonParser *self, const char *name) { JsonStringIter iter; gsize found, len; if (!gtk_json_parser_supports_member (self)) return FALSE; found = 0; for (len = json_string_iter_init (&iter, self->block->member_name); len > 0; len = json_string_iter_next (&iter)) { const char *s = json_string_iter_get (&iter); if (strncmp (name + found, s, len) != 0) return FALSE; found += len; } return TRUE; } gboolean gtk_json_parser_find_member (GtkJsonParser *self, const char *name) { if (!gtk_json_parser_supports_member (self)) { while (gtk_json_parser_next (self)); return FALSE; } gtk_json_parser_rewind (self); do { if (gtk_json_parser_has_member (self, name)) return TRUE; } while (gtk_json_parser_next (self)); return FALSE; } static gssize json_string_iter_run_select (const guchar *string_data, const char * const *options) { JsonStringIter iter; gssize i, j; gsize found, len; if (options == NULL || options[0] == NULL) return -1; found = 0; i = 0; for (len = json_string_iter_init (&iter, string_data); len > 0; len = json_string_iter_next (&iter)) { const char *s = json_string_iter_get (&iter); if (strncmp (options[i] + found, s, len) != 0) { for (j = i + 1; options[j]; j++) { if (strncmp (options[j], options[i], found) == 0 && strncmp (options[j] + found, s, len) == 0) { i = j; break; } } if (j != i) return -1; } found += len; } if (options[i][found] == 0) return i; for (j = i + 1; options[j]; i++) { if (strncmp (options[j], options[i], found) != 0) continue; if (options[j][found] == 0) return j; } return -1; } gssize gtk_json_parser_select_member (GtkJsonParser *self, const char * const *options) { if (!gtk_json_parser_supports_member (self)) return -1; return json_string_iter_run_select (self->block->member_name, options); } gboolean gtk_json_parser_get_boolean (GtkJsonParser *self) { if (self->error) return FALSE; if (self->block->value == NULL) return FALSE; if (*self->block->value == 't') return TRUE; else if (*self->block->value == 'f') return FALSE; gtk_json_parser_type_error (self, "Expected a boolean value"); return FALSE; } double gtk_json_parser_get_number (GtkJsonParser *self) { double result; if (self->error) return 0; if (self->block->value == NULL) return 0; if (!strchr ("-0123456789", *self->block->value)) { gtk_json_parser_type_error (self, "Expected a number"); return 0; } errno = 0; result = g_ascii_strtod ((const char *) self->block->value, NULL); if (errno) { if (errno == ERANGE) gtk_json_parser_value_error (self, "Number out of range"); else gtk_json_parser_value_error (self, "%s", g_strerror (errno)); return 0; } return result; } int gtk_json_parser_get_int (GtkJsonParser *self) { long result; char *end; if (self->error) return 0; if (self->block->value == NULL) return 0; if (!strchr ("-0123456789", *self->block->value)) { gtk_json_parser_type_error (self, "Expected an intereger"); return 0; } errno = 0; result = strtol ((const char *) self->block->value, &end, 10); if (*end == '.' || *end == 'e' || *end == 'E') { gtk_json_parser_type_error (self, "Expected an intereger"); return 0; } if (errno) { if (errno == ERANGE) gtk_json_parser_value_error (self, "Number out of integer range"); else gtk_json_parser_value_error (self, "%s", g_strerror (errno)); return 0; } else if (result > G_MAXINT || result < G_MININT) { gtk_json_parser_value_error (self, "Number out of integer range"); return 0; } return result; } guint gtk_json_parser_get_uint (GtkJsonParser *self) { gulong result; char *end; if (self->error) return 0; if (self->block->value == NULL) return 0; if (!strchr ("0123456789", *self->block->value)) { gtk_json_parser_type_error (self, "Expected an unsigned intereger"); return 0; } errno = 0; result = strtoul ((const char *) self->block->value, &end, 10); if (*end == '.' || *end == 'e' || *end == 'E') { gtk_json_parser_type_error (self, "Expected an unsigned intereger"); return 0; } if (errno) { if (errno == ERANGE) gtk_json_parser_value_error (self, "Number out of unsignedinteger range"); else gtk_json_parser_value_error (self, "%s", g_strerror (errno)); return 0; } else if (result > G_MAXUINT) { gtk_json_parser_value_error (self, "Number out of unsigned integer range"); return 0; } return result; } char * gtk_json_parser_get_string (GtkJsonParser *self) { if (self->error) return g_strdup (""); if (self->block->value == NULL) return g_strdup (""); if (*self->block->value != '"') { gtk_json_parser_type_error (self, "Expected a string"); return g_strdup (""); } return gtk_json_unescape_string (self->block->value); } gssize gtk_json_parser_select_string (GtkJsonParser *self, const char * const *options) { if (self->error) return -1; if (self->block->value == NULL) return -1; if (*self->block->value != '"') { gtk_json_parser_type_error (self, "Expected a string"); return -1; } return json_string_iter_run_select (self->block->value, options); } gboolean gtk_json_parser_start_object (GtkJsonParser *self) { if (self->error) return FALSE; if (!gtk_json_parser_try_char (self, '{')) { gtk_json_parser_type_error (self, "Expected an object"); return FALSE; } gtk_json_parser_push_block (self, GTK_JSON_BLOCK_OBJECT); gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error_at (self, self->block[-1].value, self->reader, "Unterminated object"); return FALSE; } if (gtk_json_parser_has_char (self, '}')) return TRUE; if (!gtk_json_parser_has_char (self, '"')) { gtk_json_parser_syntax_error (self, "Expected a string for object member name"); return FALSE; } self->block->member_name = self->reader; if (!gtk_json_parser_parse_string (self)) return FALSE; gtk_json_parser_skip_whitespace (self); if (!gtk_json_parser_try_char (self, ':')) { gtk_json_parser_syntax_error (self, "Missing ':' after member name"); return FALSE; } gtk_json_parser_skip_whitespace (self); self->block->value = self->reader; if (!gtk_json_parser_parse_value (self)) return FALSE; return TRUE; } gboolean gtk_json_parser_start_array (GtkJsonParser *self) { if (self->error) return FALSE; if (!gtk_json_parser_try_char (self, '[')) { gtk_json_parser_type_error (self, "Expected an array"); return FALSE; } gtk_json_parser_push_block (self, GTK_JSON_BLOCK_ARRAY); gtk_json_parser_skip_whitespace (self); if (gtk_json_parser_is_eof (self)) { gtk_json_parser_syntax_error_at (self, self->block[-1].value, self->reader, "Unterminated array"); return FALSE; } if (gtk_json_parser_has_char (self, ']')) { self->block->value = NULL; return TRUE; } self->block->value = self->reader; if (!gtk_json_parser_parse_value (self)) return FALSE; return TRUE; } gboolean gtk_json_parser_end (GtkJsonParser *self) { char bracket; g_return_val_if_fail (self != NULL, FALSE); while (gtk_json_parser_next (self)); if (self->error) return FALSE; switch (self->block->type) { case GTK_JSON_BLOCK_OBJECT: bracket = '}'; break; case GTK_JSON_BLOCK_ARRAY: bracket = ']'; break; case GTK_JSON_BLOCK_TOPLEVEL: default: g_return_val_if_reached (FALSE); } if (!gtk_json_parser_try_char (self, bracket)) { gtk_json_parser_syntax_error (self, "No terminating '%c'", bracket); return FALSE; } gtk_json_parser_pop_block (self); return TRUE; }