diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/coding.c | 22 | ||||
-rw-r--r-- | src/coding.h | 1 | ||||
-rw-r--r-- | src/json.c | 33 |
3 files changed, 48 insertions, 8 deletions
diff --git a/src/coding.c b/src/coding.c index 1705838ffad..5ea1e395f20 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6360,6 +6360,27 @@ check_utf_8 (struct coding_system *coding) } +/* Return whether STRING is a valid UTF-8 string. STRING must be a + unibyte string. */ + +bool +utf8_string_p (Lisp_Object string) +{ + eassert (!STRING_MULTIBYTE (string)); + struct coding_system coding; + setup_coding_system (Qutf_8_unix, &coding); + /* We initialize only the fields that check_utf_8 accesses. */ + coding.head_ascii = -1; + coding.src_pos = 0; + coding.src_pos_byte = 0; + coding.src_chars = SCHARS (string); + coding.src_bytes = SBYTES (string); + coding.src_object = string; + coding.eol_seen = EOL_SEEN_NONE; + return check_utf_8 (&coding) != -1; +} + + /* Detect how end-of-line of a text of length SRC_BYTES pointed by SOURCE is encoded. If CATEGORY is one of coding_category_utf_16_XXXX, assume that CR and LF are encoded by @@ -10846,6 +10867,7 @@ syms_of_coding (void) DEFSYM (Qiso_2022, "iso-2022"); DEFSYM (Qutf_8, "utf-8"); + DEFSYM (Qutf_8_unix, "utf-8-unix"); DEFSYM (Qutf_8_emacs, "utf-8-emacs"); #if defined (WINDOWSNT) || defined (CYGWIN) diff --git a/src/coding.h b/src/coding.h index 66d125b07e6..bc4ef52e1ed 100644 --- a/src/coding.h +++ b/src/coding.h @@ -665,6 +665,7 @@ struct coding_system /* Extern declarations. */ extern Lisp_Object code_conversion_save (bool, bool); extern bool encode_coding_utf_8 (struct coding_system *); +extern bool utf8_string_p (Lisp_Object); extern void setup_coding_system (Lisp_Object, struct coding_system *); extern Lisp_Object coding_charset_list (struct coding_system *); extern Lisp_Object coding_system_charset_list (Lisp_Object); diff --git a/src/json.c b/src/json.c index 88db86ad2e3..93dcc730dae 100644 --- a/src/json.c +++ b/src/json.c @@ -316,6 +316,15 @@ json_check (json_t *object) return object; } +/* If STRING is not a valid UTF-8 string, signal an error of type + `wrong-type-argument'. STRING must be a unibyte string. */ + +static void +json_check_utf8 (Lisp_Object string) +{ + CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string); +} + static json_t *lisp_to_json (Lisp_Object); /* Convert a Lisp object to a toplevel JSON object (array or object). @@ -363,9 +372,12 @@ lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json) int status = json_object_set_new (*json, key_str, lisp_to_json (HASH_VALUE (h, i))); if (status == -1) - /* FIXME: A failure here might also indicate that the - key is not a valid Unicode string. */ - json_out_of_memory (); + { + /* A failure can be caused either by an invalid key or + by low memory. */ + json_check_utf8 (key); + json_out_of_memory (); + } } clear_unwind_protect (count); return unbind_to (count, Qnil); @@ -447,9 +459,15 @@ lisp_to_json (Lisp_Object lisp) else if (STRINGP (lisp)) { Lisp_Object encoded = json_encode (lisp); - /* FIXME: We might throw an out-of-memory error here if the - string is not valid Unicode. */ - return json_check (json_stringn (SSDATA (encoded), SBYTES (encoded))); + json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded)); + if (json == NULL) + { + /* A failure can be caused either by an invalid string or by + low memory. */ + json_check_utf8 (encoded); + json_out_of_memory (); + } + return json; } /* LISP now must be a vector, hashtable, or alist. */ @@ -863,8 +881,7 @@ syms_of_json (void) DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p"); DEFSYM (Qjson_value_p, "json-value-p"); - - DEFSYM (Qutf_8_unix, "utf-8-unix"); + DEFSYM (Qutf_8_string_p, "utf-8-string-p"); DEFSYM (Qjson_error, "json-error"); DEFSYM (Qjson_out_of_memory, "json-out-of-memory"); |